Send durability stats after iterating over all segments (#2028)

This commit is contained in:
Maximillian von Briesen 2019-05-22 17:17:52 -04:00 committed by Natalie Villasana
parent 4058c29ca4
commit 45a2253628

View File

@ -35,13 +35,21 @@ type Config struct {
// Checker contains the information needed to do checks for missing pieces
type Checker struct {
metainfo *metainfo.Service
lastChecked string
repairQueue queue.RepairQueue
overlay *overlay.Cache
irrdb irreparable.DB
logger *zap.Logger
Loop sync2.Cycle
metainfo *metainfo.Service
lastChecked string
repairQueue queue.RepairQueue
overlay *overlay.Cache
irrdb irreparable.DB
logger *zap.Logger
Loop sync2.Cycle
durabilityStats durabilityStats
}
type durabilityStats struct {
remoteSegmentsChecked int64
remoteSegmentsNeedingRepair int64
remoteSegmentsLost int64
remoteSegmentInfo []string
}
// NewChecker creates a new instance of checker
@ -82,11 +90,6 @@ func (checker *Checker) Close() error {
func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
var remoteSegmentsChecked int64
var remoteSegmentsNeedingRepair int64
var remoteSegmentsLost int64
var remoteSegmentInfo []string
err = checker.metainfo.Iterate("", checker.lastChecked, true, false,
func(it storage.Iterator) error {
var item storage.ListItem
@ -99,6 +102,18 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
// if keys are equal, start from the beginning in the next call
if nextItem.Key.String() == item.Key.String() {
checker.lastChecked = ""
// send durability stats
mon.IntVal("remote_segments_checked").Observe(checker.durabilityStats.remoteSegmentsChecked)
mon.IntVal("remote_segments_needing_repair").Observe(checker.durabilityStats.remoteSegmentsNeedingRepair)
mon.IntVal("remote_segments_lost").Observe(checker.durabilityStats.remoteSegmentsLost)
mon.IntVal("remote_files_lost").Observe(int64(len(checker.durabilityStats.remoteSegmentInfo)))
// reset durability stats
checker.durabilityStats.remoteSegmentsChecked = 0
checker.durabilityStats.remoteSegmentsNeedingRepair = 0
checker.durabilityStats.remoteSegmentsLost = 0
checker.durabilityStats.remoteSegmentInfo = []string{}
}
}()
@ -126,7 +141,7 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
return Error.New("error getting missing pieces %s", err)
}
remoteSegmentsChecked++
checker.durabilityStats.remoteSegmentsChecked++
numHealthy := int32(len(pieces) - len(missingPieces))
redundancy := pointer.Remote.Redundancy
// we repair when the number of healthy files is less than or equal to the repair threshold
@ -136,7 +151,7 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
checker.logger.Warn("Missing pieces is zero in checker, but this should be impossible -- bad redundancy scheme.")
continue
}
remoteSegmentsNeedingRepair++
checker.durabilityStats.remoteSegmentsNeedingRepair++
err = checker.repairQueue.Insert(ctx, &pb.InjuredSegment{
Path: string(item.Key),
LostPieces: missingPieces,
@ -152,15 +167,15 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
if len(pathElements) >= 4 {
project, bucketName, segmentpath := pathElements[0], pathElements[2], pathElements[3]
lostSegInfo := storj.JoinPaths(project, bucketName, segmentpath)
if contains(remoteSegmentInfo, lostSegInfo) == false {
remoteSegmentInfo = append(remoteSegmentInfo, lostSegInfo)
if contains(checker.durabilityStats.remoteSegmentInfo, lostSegInfo) == false {
checker.durabilityStats.remoteSegmentInfo = append(checker.durabilityStats.remoteSegmentInfo, lostSegInfo)
}
}
// TODO: irreparable segment should be using storj.NodeID or something, since at the point of repair
// it may have been already repaired once.
remoteSegmentsLost++
checker.durabilityStats.remoteSegmentsLost++
// make an entry in to the irreparable table
segmentInfo := &pb.IrreparableSegment{
Path: item.Key,
@ -183,10 +198,6 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
if err != nil {
return err
}
mon.IntVal("remote_segments_checked").Observe(remoteSegmentsChecked)
mon.IntVal("remote_segments_needing_repair").Observe(remoteSegmentsNeedingRepair)
mon.IntVal("remote_segments_lost").Observe(remoteSegmentsLost)
mon.IntVal("remote_files_lost").Observe(int64(len(remoteSegmentInfo)))
return nil
}