diff --git a/monkit.lock b/monkit.lock index 6fe21a88e..a097d9810 100644 --- a/monkit.lock +++ b/monkit.lock @@ -60,6 +60,8 @@ storj.io/storj/satellite/repair/checker."checker_segment_total_count" IntVal storj.io/storj/satellite/repair/checker."remote_files_checked" IntVal storj.io/storj/satellite/repair/checker."remote_files_lost" IntVal storj.io/storj/satellite/repair/checker."remote_segments_checked" IntVal +storj.io/storj/satellite/repair/checker."remote_segments_failed_to_check" IntVal +storj.io/storj/satellite/repair/checker."remote_segments_healthy_percentage" FloatVal storj.io/storj/satellite/repair/checker."remote_segments_lost" IntVal storj.io/storj/satellite/repair/checker."remote_segments_needing_repair" IntVal storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_1" IntVal diff --git a/satellite/repair/checker/checker.go b/satellite/repair/checker/checker.go index 6e7af0b11..b23004fe7 100644 --- a/satellite/repair/checker/checker.go +++ b/satellite/repair/checker/checker.go @@ -43,6 +43,7 @@ type durabilityStats struct { remoteSegmentsChecked int64 remoteSegmentsNeedingRepair int64 remoteSegmentsLost int64 + remoteSegmentsFailedToCheck int64 remoteSegmentInfo []string // remoteSegmentsOverThreshold[0]=# of healthy=rt+1, remoteSegmentsOverThreshold[1]=# of healthy=rt+2, etc... remoteSegmentsOverThreshold [5]int64 @@ -130,6 +131,7 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error) mon.IntVal("remote_files_checked").Observe(observer.monStats.objectsChecked) //locked mon.IntVal("remote_segments_checked").Observe(observer.monStats.remoteSegmentsChecked) //locked + mon.IntVal("remote_segments_failed_to_check").Observe(observer.monStats.remoteSegmentsFailedToCheck) //locked mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) //locked mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) //locked mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) //locked @@ -139,6 +141,11 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error) mon.IntVal("remote_segments_over_threshold_4").Observe(observer.monStats.remoteSegmentsOverThreshold[3]) //locked mon.IntVal("remote_segments_over_threshold_5").Observe(observer.monStats.remoteSegmentsOverThreshold[4]) //locked + allUnhealthy := observer.monStats.remoteSegmentsNeedingRepair + observer.monStats.remoteSegmentsFailedToCheck + allChecked := observer.monStats.remoteSegmentsChecked + allHealthy := allChecked - allUnhealthy + mon.FloatVal("remote_segments_healthy_percentage").Observe(100 * float64(allHealthy) / float64(allChecked)) //locked + return nil } @@ -245,6 +252,7 @@ func (obs *checkerObserver) RemoteSegment(ctx context.Context, path metainfo.Sco missingPieces, err := obs.nodestate.MissingPieces(ctx, pointer.CreationDate, pieces) if err != nil { + obs.monStats.remoteSegmentsFailedToCheck++ return errs.Combine(Error.New("error getting missing pieces"), err) }