2019-11-27 09:28:43 +00:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2019-12-09 11:30:52 +00:00
|
|
|
"encoding/base64"
|
2019-11-27 09:28:43 +00:00
|
|
|
"encoding/csv"
|
|
|
|
"strconv"
|
2019-12-09 11:30:52 +00:00
|
|
|
"time"
|
2019-11-27 09:28:43 +00:00
|
|
|
|
|
|
|
"github.com/zeebo/errs"
|
|
|
|
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/pb"
|
|
|
|
"storj.io/common/storj"
|
2019-11-27 09:28:43 +00:00
|
|
|
"storj.io/storj/satellite/metainfo"
|
|
|
|
)
|
|
|
|
|
2019-12-10 10:01:58 +00:00
|
|
|
const (
|
2020-05-06 20:56:41 +01:00
|
|
|
lastSegment = int(-1)
|
|
|
|
rateLimit = 0
|
2019-12-10 10:01:58 +00:00
|
|
|
)
|
2019-12-09 11:30:52 +00:00
|
|
|
|
2019-11-27 09:28:43 +00:00
|
|
|
// object represents object with segments.
|
|
|
|
type object struct {
|
2020-05-06 20:56:41 +01:00
|
|
|
segments bitArray
|
|
|
|
expectedNumberOfSegments int
|
|
|
|
hasLastSegment bool
|
2019-11-27 09:28:43 +00:00
|
|
|
// if skip is true then segments from this object shouldn't be treated as zombie segments
|
|
|
|
// and printed out, e.g. when one of segments is out of specified date rage
|
|
|
|
skip bool
|
|
|
|
}
|
|
|
|
|
|
|
|
// bucketsObjects keeps a list of objects associated with their path per bucket
|
|
|
|
// name.
|
|
|
|
type bucketsObjects map[string]map[storj.Path]*object
|
|
|
|
|
2019-12-09 11:30:52 +00:00
|
|
|
func newObserver(db metainfo.PointerDB, w *csv.Writer, from, to *time.Time) (*observer, error) {
|
|
|
|
headers := []string{
|
|
|
|
"ProjectID",
|
|
|
|
"SegmentIndex",
|
|
|
|
"Bucket",
|
|
|
|
"EncodedEncryptedPath",
|
|
|
|
"CreationDate",
|
2020-05-18 19:05:22 +01:00
|
|
|
"Size",
|
2019-12-09 11:30:52 +00:00
|
|
|
}
|
|
|
|
err := w.Write(headers)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2019-11-27 09:28:43 +00:00
|
|
|
return &observer{
|
2019-12-09 11:30:52 +00:00
|
|
|
db: db,
|
|
|
|
writer: w,
|
|
|
|
from: from,
|
|
|
|
to: to,
|
2020-05-06 20:56:41 +01:00
|
|
|
zombieBuffer: make([]int, 0),
|
2019-11-27 09:28:43 +00:00
|
|
|
|
|
|
|
objects: make(bucketsObjects),
|
2019-12-09 11:30:52 +00:00
|
|
|
}, nil
|
2019-11-27 09:28:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// observer metainfo.Loop observer for zombie reaper.
|
|
|
|
type observer struct {
|
|
|
|
db metainfo.PointerDB
|
|
|
|
writer *csv.Writer
|
2019-12-09 11:30:52 +00:00
|
|
|
from *time.Time
|
|
|
|
to *time.Time
|
2019-11-27 09:28:43 +00:00
|
|
|
|
|
|
|
lastProjectID string
|
2019-12-09 11:30:52 +00:00
|
|
|
zombieBuffer []int
|
2019-11-27 09:28:43 +00:00
|
|
|
|
|
|
|
objects bucketsObjects
|
|
|
|
inlineSegments int
|
|
|
|
lastInlineSegments int
|
|
|
|
remoteSegments int
|
2019-12-09 11:30:52 +00:00
|
|
|
zombieSegments int
|
2019-11-27 09:28:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// RemoteSegment processes a segment to collect data needed to detect zombie segment.
|
|
|
|
func (obsvr *observer) RemoteSegment(ctx context.Context, path metainfo.ScopedPath, pointer *pb.Pointer) (err error) {
|
|
|
|
return obsvr.processSegment(ctx, path, pointer)
|
|
|
|
}
|
|
|
|
|
|
|
|
// InlineSegment processes a segment to collect data needed to detect zombie segment.
|
|
|
|
func (obsvr *observer) InlineSegment(ctx context.Context, path metainfo.ScopedPath, pointer *pb.Pointer) (err error) {
|
|
|
|
return obsvr.processSegment(ctx, path, pointer)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Object not used in this implementation.
|
|
|
|
func (obsvr *observer) Object(ctx context.Context, path metainfo.ScopedPath, pointer *pb.Pointer) (err error) {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-12-10 10:01:58 +00:00
|
|
|
// processSegment aggregates, in the observer internal state, the objects that
|
|
|
|
// belong the same project, tracking their segments indexes and aggregated
|
|
|
|
// information of them for calling analyzeProject method, before a new project
|
2020-05-11 18:10:38 +01:00
|
|
|
// list of object segments starts and its internal status is reset.
|
2019-12-10 10:01:58 +00:00
|
|
|
//
|
|
|
|
// It also aggregates some stats about all the segments independently of the
|
|
|
|
// object to which belong.
|
|
|
|
//
|
|
|
|
// NOTE it's expected that this method is called continually for the objects
|
|
|
|
// which belong to a same project before calling it with objects of another
|
|
|
|
// project.
|
2019-11-27 09:28:43 +00:00
|
|
|
func (obsvr *observer) processSegment(ctx context.Context, path metainfo.ScopedPath, pointer *pb.Pointer) error {
|
|
|
|
if obsvr.lastProjectID != "" && obsvr.lastProjectID != path.ProjectIDString {
|
|
|
|
err := obsvr.analyzeProject(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// cleanup map to free memory
|
|
|
|
obsvr.clearBucketsObjects()
|
|
|
|
}
|
|
|
|
|
|
|
|
obsvr.lastProjectID = path.ProjectIDString
|
|
|
|
isLastSegment := path.Segment == "l"
|
2020-05-11 18:10:38 +01:00
|
|
|
|
|
|
|
// collect number of pointers for reporting
|
|
|
|
if pointer.Type == pb.Pointer_INLINE {
|
|
|
|
obsvr.inlineSegments++
|
|
|
|
if isLastSegment {
|
|
|
|
obsvr.lastInlineSegments++
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
obsvr.remoteSegments++
|
|
|
|
}
|
|
|
|
|
2019-11-27 09:28:43 +00:00
|
|
|
object := findOrCreate(path.BucketName, path.EncryptedObjectPath, obsvr.objects)
|
2020-05-11 18:10:38 +01:00
|
|
|
if obsvr.from != nil && pointer.CreationDate.Before(*obsvr.from) {
|
|
|
|
object.skip = true
|
|
|
|
// release the memory consumed by the segments because it won't be used
|
|
|
|
// for skip objects
|
|
|
|
object.segments = nil
|
|
|
|
return nil
|
|
|
|
} else if obsvr.to != nil && pointer.CreationDate.After(*obsvr.to) {
|
|
|
|
object.skip = true
|
|
|
|
// release the memory consumed by the segments because it won't be used
|
|
|
|
// for skip objects
|
|
|
|
object.segments = nil
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-11-27 09:28:43 +00:00
|
|
|
if isLastSegment {
|
|
|
|
object.hasLastSegment = true
|
|
|
|
|
|
|
|
streamMeta := pb.StreamMeta{}
|
2020-04-08 13:08:57 +01:00
|
|
|
err := pb.Unmarshal(pointer.Metadata, &streamMeta)
|
2019-11-27 09:28:43 +00:00
|
|
|
if err != nil {
|
|
|
|
return errs.New("unexpected error unmarshalling pointer metadata %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if streamMeta.NumberOfSegments > 0 {
|
2020-05-06 20:56:41 +01:00
|
|
|
object.expectedNumberOfSegments = int(streamMeta.NumberOfSegments)
|
2019-11-27 09:28:43 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
segmentIndex, err := strconv.Atoi(path.Segment[1:])
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-05-06 20:56:41 +01:00
|
|
|
ok, err := object.segments.Has(segmentIndex)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if ok {
|
|
|
|
// TODO make path displayable
|
|
|
|
return errs.New("fatal error this segment is duplicated: %s", path.Raw)
|
|
|
|
}
|
2019-11-27 09:28:43 +00:00
|
|
|
|
2020-05-06 20:56:41 +01:00
|
|
|
err = object.segments.Set(segmentIndex)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2019-11-27 12:11:07 +00:00
|
|
|
}
|
2019-11-27 09:28:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-12-10 14:28:46 +00:00
|
|
|
func (obsvr *observer) detectZombieSegments(ctx context.Context) error {
|
2019-12-19 18:33:59 +00:00
|
|
|
err := metainfo.IterateDatabase(ctx, rateLimit, obsvr.db, obsvr)
|
2019-12-10 14:28:46 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return obsvr.analyzeProject(ctx)
|
|
|
|
}
|
|
|
|
|
2019-11-27 09:28:43 +00:00
|
|
|
// analyzeProject analyzes the objects in obsv.objects field for detecting bad
|
|
|
|
// segments and writing them to objs.writer.
|
|
|
|
func (obsvr *observer) analyzeProject(ctx context.Context) error {
|
2019-12-09 11:30:52 +00:00
|
|
|
for bucket, objects := range obsvr.objects {
|
|
|
|
for path, object := range objects {
|
|
|
|
if object.skip {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
err := obsvr.findZombieSegments(object)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, segmentIndex := range obsvr.zombieBuffer {
|
|
|
|
err = obsvr.printSegment(ctx, segmentIndex, bucket, path)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (obsvr *observer) findZombieSegments(object *object) error {
|
|
|
|
obsvr.resetZombieBuffer()
|
|
|
|
|
|
|
|
if !object.hasLastSegment {
|
|
|
|
obsvr.appendAllObjectSegments(object)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
segmentsCount := object.segments.Count()
|
|
|
|
|
|
|
|
switch {
|
|
|
|
// this case is only for old style pointers with encrypted number of segments
|
|
|
|
// value 0 means that we don't know how much segments object should have
|
|
|
|
case object.expectedNumberOfSegments == 0:
|
|
|
|
sequenceLength := firstSequenceLength(object.segments)
|
|
|
|
|
2020-05-06 20:56:41 +01:00
|
|
|
for index := sequenceLength; index < object.segments.Length(); index++ {
|
2019-12-09 11:30:52 +00:00
|
|
|
has, err := object.segments.Has(index)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if has {
|
|
|
|
obsvr.appendSegment(index)
|
|
|
|
}
|
|
|
|
}
|
2020-05-11 18:10:38 +01:00
|
|
|
// using 'expectedNumberOfSegments-1' because 'segments' doesn't contain last segment
|
2020-05-06 20:56:41 +01:00
|
|
|
case segmentsCount > object.expectedNumberOfSegments-1:
|
2019-12-09 11:30:52 +00:00
|
|
|
sequenceLength := firstSequenceLength(object.segments)
|
|
|
|
|
2020-05-06 20:56:41 +01:00
|
|
|
if sequenceLength == object.expectedNumberOfSegments-1 {
|
|
|
|
for index := sequenceLength; index < object.segments.Length(); index++ {
|
2019-12-09 11:30:52 +00:00
|
|
|
has, err := object.segments.Has(index)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if has {
|
|
|
|
obsvr.appendSegment(index)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
obsvr.appendAllObjectSegments(object)
|
|
|
|
obsvr.appendSegment(lastSegment)
|
|
|
|
}
|
2020-05-06 20:56:41 +01:00
|
|
|
case segmentsCount < object.expectedNumberOfSegments-1,
|
|
|
|
segmentsCount == object.expectedNumberOfSegments-1 && !object.segments.IsSequence():
|
2019-12-09 11:30:52 +00:00
|
|
|
obsvr.appendAllObjectSegments(object)
|
|
|
|
obsvr.appendSegment(lastSegment)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (obsvr *observer) printSegment(ctx context.Context, segmentIndex int, bucket, path string) error {
|
|
|
|
var segmentIndexStr string
|
|
|
|
if segmentIndex == lastSegment {
|
|
|
|
segmentIndexStr = "l"
|
|
|
|
} else {
|
|
|
|
segmentIndexStr = "s" + strconv.Itoa(segmentIndex)
|
|
|
|
}
|
2020-05-18 19:05:22 +01:00
|
|
|
creationDate, size, err := pointerCreationDateAndSize(ctx, obsvr.db, obsvr.lastProjectID, segmentIndexStr, bucket, path)
|
2019-12-09 11:30:52 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
encodedPath := base64.StdEncoding.EncodeToString([]byte(path))
|
|
|
|
err = obsvr.writer.Write([]string{
|
|
|
|
obsvr.lastProjectID,
|
|
|
|
segmentIndexStr,
|
|
|
|
bucket,
|
|
|
|
encodedPath,
|
|
|
|
creationDate,
|
2020-05-18 19:05:22 +01:00
|
|
|
strconv.FormatInt(size, 10),
|
2019-12-09 11:30:52 +00:00
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
obsvr.zombieSegments++
|
2019-11-27 09:28:43 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-05-18 19:05:22 +01:00
|
|
|
func pointerCreationDateAndSize(
|
|
|
|
ctx context.Context, db metainfo.PointerDB, projectID, segmentIndex, bucket, path string,
|
|
|
|
) (creationDate string, size int64, _ error) {
|
2019-12-09 11:30:52 +00:00
|
|
|
key := []byte(storj.JoinPaths(projectID, segmentIndex, bucket, path))
|
|
|
|
pointerBytes, err := db.Get(ctx, key)
|
|
|
|
if err != nil {
|
2020-05-18 19:05:22 +01:00
|
|
|
return "", 0, err
|
2019-12-09 11:30:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pointer := &pb.Pointer{}
|
2020-04-08 13:08:57 +01:00
|
|
|
err = pb.Unmarshal(pointerBytes, pointer)
|
2019-12-09 11:30:52 +00:00
|
|
|
if err != nil {
|
2020-05-18 19:05:22 +01:00
|
|
|
return "", 0, err
|
2019-12-09 11:30:52 +00:00
|
|
|
}
|
2020-05-18 19:05:22 +01:00
|
|
|
|
|
|
|
return pointer.CreationDate.Format(time.RFC3339Nano), pointer.SegmentSize, nil
|
2019-12-09 11:30:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (obsvr *observer) resetZombieBuffer() {
|
|
|
|
obsvr.zombieBuffer = obsvr.zombieBuffer[:0]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (obsvr *observer) appendSegment(segmentIndex int) {
|
|
|
|
obsvr.zombieBuffer = append(obsvr.zombieBuffer, segmentIndex)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (obsvr *observer) appendAllObjectSegments(object *object) {
|
2020-05-06 20:56:41 +01:00
|
|
|
for index := 0; index < object.segments.Length(); index++ {
|
2019-12-09 11:30:52 +00:00
|
|
|
has, err := object.segments.Has(index)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if has {
|
|
|
|
obsvr.appendSegment(index)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-27 09:28:43 +00:00
|
|
|
// clearBucketsObjects clears up the buckets objects map for reusing it.
|
|
|
|
func (obsvr *observer) clearBucketsObjects() {
|
|
|
|
// This is an idiomatic way of not having to destroy and recreate a new map
|
|
|
|
// each time that a empty map is required.
|
|
|
|
// See https://github.com/golang/go/issues/20138
|
|
|
|
for b := range obsvr.objects {
|
|
|
|
delete(obsvr.objects, b)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func findOrCreate(bucketName string, path string, buckets bucketsObjects) *object {
|
|
|
|
objects, ok := buckets[bucketName]
|
|
|
|
if !ok {
|
|
|
|
objects = make(map[storj.Path]*object)
|
|
|
|
buckets[bucketName] = objects
|
|
|
|
}
|
|
|
|
|
|
|
|
obj, ok := objects[path]
|
|
|
|
if !ok {
|
2020-05-06 20:56:41 +01:00
|
|
|
obj = &object{segments: bitArray{}}
|
2019-11-27 09:28:43 +00:00
|
|
|
objects[path] = obj
|
|
|
|
}
|
|
|
|
|
|
|
|
return obj
|
|
|
|
}
|
2019-12-09 11:30:52 +00:00
|
|
|
|
2020-05-06 20:56:41 +01:00
|
|
|
func firstSequenceLength(segments bitArray) int {
|
|
|
|
for index := 0; index < segments.Length(); index++ {
|
2019-12-09 11:30:52 +00:00
|
|
|
has, err := segments.Has(index)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if !has {
|
|
|
|
return index
|
|
|
|
}
|
|
|
|
}
|
2020-05-06 20:56:41 +01:00
|
|
|
return segments.Length()
|
2019-12-09 11:30:52 +00:00
|
|
|
}
|