cmd/segment-reaper: Remove segments of skipped objects

Reorganize some operations in the observer.processSegment method:

* to minimal reduce the memory usage removing the segments of the
  objects marked as skipped. Skipped objects aren't discarded of the
  analysis stage so the segments aren't needed.
* returns earlier when an object is skipped because isn't needed a
  further processing.

Change-Id: I210a26c394477ee411ff7f640507dcc07733a47f
This commit is contained in:
Ivan Fraixedes 2020-05-11 19:10:38 +02:00
parent 9314092841
commit c58dccb7a8
No known key found for this signature in database
GPG Key ID: 042B474597F96DB7
2 changed files with 27 additions and 23 deletions

View File

@ -95,7 +95,7 @@ func (obsvr *observer) Object(ctx context.Context, path metainfo.ScopedPath, poi
// processSegment aggregates, in the observer internal state, the objects that
// belong the same project, tracking their segments indexes and aggregated
// information of them for calling analyzeProject method, before a new project
// list of object segments list starts and the internal status is reset.
// list of object segments starts and its internal status is reset.
//
// It also aggregates some stats about all the segments independently of the
// object to which belong.
@ -115,9 +115,33 @@ func (obsvr *observer) processSegment(ctx context.Context, path metainfo.ScopedP
}
obsvr.lastProjectID = path.ProjectIDString
isLastSegment := path.Segment == "l"
// collect number of pointers for reporting
if pointer.Type == pb.Pointer_INLINE {
obsvr.inlineSegments++
if isLastSegment {
obsvr.lastInlineSegments++
}
} else {
obsvr.remoteSegments++
}
object := findOrCreate(path.BucketName, path.EncryptedObjectPath, obsvr.objects)
if obsvr.from != nil && pointer.CreationDate.Before(*obsvr.from) {
object.skip = true
// release the memory consumed by the segments because it won't be used
// for skip objects
object.segments = nil
return nil
} else if obsvr.to != nil && pointer.CreationDate.After(*obsvr.to) {
object.skip = true
// release the memory consumed by the segments because it won't be used
// for skip objects
object.segments = nil
return nil
}
if isLastSegment {
object.hasLastSegment = true
@ -150,22 +174,6 @@ func (obsvr *observer) processSegment(ctx context.Context, path metainfo.ScopedP
}
}
if obsvr.from != nil && pointer.CreationDate.Before(*obsvr.from) {
object.skip = true
} else if obsvr.to != nil && pointer.CreationDate.After(*obsvr.to) {
object.skip = true
}
// collect number of pointers for report
if pointer.Type == pb.Pointer_INLINE {
obsvr.inlineSegments++
if isLastSegment {
obsvr.lastInlineSegments++
}
} else {
obsvr.remoteSegments++
}
return nil
}
@ -213,7 +221,6 @@ func (obsvr *observer) findZombieSegments(object *object) error {
segmentsCount := object.segments.Count()
// using 'expectedNumberOfSegments-1' because 'segments' doesn't contain last segment
switch {
// this case is only for old style pointers with encrypted number of segments
// value 0 means that we don't know how much segments object should have
@ -229,6 +236,7 @@ func (obsvr *observer) findZombieSegments(object *object) error {
obsvr.appendSegment(index)
}
}
// using 'expectedNumberOfSegments-1' because 'segments' doesn't contain last segment
case segmentsCount > object.expectedNumberOfSegments-1:
sequenceLength := firstSequenceLength(object.segments)

View File

@ -225,8 +225,6 @@ func TestObserver_processSegment(t *testing.T) {
require.Equal(t, 1, len(obsvr.objects[bucketName]), "objects in object map")
require.Contains(t, obsvr.objects[bucketName], objPath, "path in bucket objects map")
obj := obsvr.objects[bucketName][objPath]
assert.Equal(t, 1, obj.expectedNumberOfSegments, "Object.expectedNumSegments")
assert.True(t, obj.hasLastSegment, "Object.hasLastSegment")
assert.True(t, obj.skip, "Object.skip")
// Assert observer global stats
@ -344,8 +342,6 @@ func TestObserver_processSegment(t *testing.T) {
require.Equal(t, 1, len(obsvr.objects[bucketName]), "objects in object map")
require.Contains(t, obsvr.objects[bucketName], objPath, "path in bucket objects map")
obj := obsvr.objects[bucketName][objPath]
assert.Equal(t, 1, obj.expectedNumberOfSegments, "Object.expectedNumSegments")
assert.True(t, obj.hasLastSegment, "Object.hasLastSegment")
assert.True(t, obj.skip, "Object.skip")
// Assert observer global stats