Send durability stats after iterating over all segments (#2028)
This commit is contained in:
parent
4058c29ca4
commit
45a2253628
@ -35,13 +35,21 @@ type Config struct {
|
||||
|
||||
// Checker contains the information needed to do checks for missing pieces
|
||||
type Checker struct {
|
||||
metainfo *metainfo.Service
|
||||
lastChecked string
|
||||
repairQueue queue.RepairQueue
|
||||
overlay *overlay.Cache
|
||||
irrdb irreparable.DB
|
||||
logger *zap.Logger
|
||||
Loop sync2.Cycle
|
||||
metainfo *metainfo.Service
|
||||
lastChecked string
|
||||
repairQueue queue.RepairQueue
|
||||
overlay *overlay.Cache
|
||||
irrdb irreparable.DB
|
||||
logger *zap.Logger
|
||||
Loop sync2.Cycle
|
||||
durabilityStats durabilityStats
|
||||
}
|
||||
|
||||
type durabilityStats struct {
|
||||
remoteSegmentsChecked int64
|
||||
remoteSegmentsNeedingRepair int64
|
||||
remoteSegmentsLost int64
|
||||
remoteSegmentInfo []string
|
||||
}
|
||||
|
||||
// NewChecker creates a new instance of checker
|
||||
@ -82,11 +90,6 @@ func (checker *Checker) Close() error {
|
||||
func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error) {
|
||||
defer mon.Task()(&ctx)(&err)
|
||||
|
||||
var remoteSegmentsChecked int64
|
||||
var remoteSegmentsNeedingRepair int64
|
||||
var remoteSegmentsLost int64
|
||||
var remoteSegmentInfo []string
|
||||
|
||||
err = checker.metainfo.Iterate("", checker.lastChecked, true, false,
|
||||
func(it storage.Iterator) error {
|
||||
var item storage.ListItem
|
||||
@ -99,6 +102,18 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
||||
// if keys are equal, start from the beginning in the next call
|
||||
if nextItem.Key.String() == item.Key.String() {
|
||||
checker.lastChecked = ""
|
||||
|
||||
// send durability stats
|
||||
mon.IntVal("remote_segments_checked").Observe(checker.durabilityStats.remoteSegmentsChecked)
|
||||
mon.IntVal("remote_segments_needing_repair").Observe(checker.durabilityStats.remoteSegmentsNeedingRepair)
|
||||
mon.IntVal("remote_segments_lost").Observe(checker.durabilityStats.remoteSegmentsLost)
|
||||
mon.IntVal("remote_files_lost").Observe(int64(len(checker.durabilityStats.remoteSegmentInfo)))
|
||||
|
||||
// reset durability stats
|
||||
checker.durabilityStats.remoteSegmentsChecked = 0
|
||||
checker.durabilityStats.remoteSegmentsNeedingRepair = 0
|
||||
checker.durabilityStats.remoteSegmentsLost = 0
|
||||
checker.durabilityStats.remoteSegmentInfo = []string{}
|
||||
}
|
||||
}()
|
||||
|
||||
@ -126,7 +141,7 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
||||
return Error.New("error getting missing pieces %s", err)
|
||||
}
|
||||
|
||||
remoteSegmentsChecked++
|
||||
checker.durabilityStats.remoteSegmentsChecked++
|
||||
numHealthy := int32(len(pieces) - len(missingPieces))
|
||||
redundancy := pointer.Remote.Redundancy
|
||||
// we repair when the number of healthy files is less than or equal to the repair threshold
|
||||
@ -136,7 +151,7 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
||||
checker.logger.Warn("Missing pieces is zero in checker, but this should be impossible -- bad redundancy scheme.")
|
||||
continue
|
||||
}
|
||||
remoteSegmentsNeedingRepair++
|
||||
checker.durabilityStats.remoteSegmentsNeedingRepair++
|
||||
err = checker.repairQueue.Insert(ctx, &pb.InjuredSegment{
|
||||
Path: string(item.Key),
|
||||
LostPieces: missingPieces,
|
||||
@ -152,15 +167,15 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
||||
if len(pathElements) >= 4 {
|
||||
project, bucketName, segmentpath := pathElements[0], pathElements[2], pathElements[3]
|
||||
lostSegInfo := storj.JoinPaths(project, bucketName, segmentpath)
|
||||
if contains(remoteSegmentInfo, lostSegInfo) == false {
|
||||
remoteSegmentInfo = append(remoteSegmentInfo, lostSegInfo)
|
||||
if contains(checker.durabilityStats.remoteSegmentInfo, lostSegInfo) == false {
|
||||
checker.durabilityStats.remoteSegmentInfo = append(checker.durabilityStats.remoteSegmentInfo, lostSegInfo)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: irreparable segment should be using storj.NodeID or something, since at the point of repair
|
||||
// it may have been already repaired once.
|
||||
|
||||
remoteSegmentsLost++
|
||||
checker.durabilityStats.remoteSegmentsLost++
|
||||
// make an entry in to the irreparable table
|
||||
segmentInfo := &pb.IrreparableSegment{
|
||||
Path: item.Key,
|
||||
@ -183,10 +198,6 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mon.IntVal("remote_segments_checked").Observe(remoteSegmentsChecked)
|
||||
mon.IntVal("remote_segments_needing_repair").Observe(remoteSegmentsNeedingRepair)
|
||||
mon.IntVal("remote_segments_lost").Observe(remoteSegmentsLost)
|
||||
mon.IntVal("remote_files_lost").Observe(int64(len(remoteSegmentInfo)))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user