satellite/repair: add monitoring for remote_segments_healthy_percentage
Change-Id: I6ad29fe1a947ac19d15e40ea33164a510eb33d4f
This commit is contained in:
parent
2f991b6c56
commit
ba5991dc86
@ -60,6 +60,8 @@ storj.io/storj/satellite/repair/checker."checker_segment_total_count" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_files_checked" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_files_lost" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_checked" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_failed_to_check" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_healthy_percentage" FloatVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_lost" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_needing_repair" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_1" IntVal
|
||||
|
@ -43,6 +43,7 @@ type durabilityStats struct {
|
||||
remoteSegmentsChecked int64
|
||||
remoteSegmentsNeedingRepair int64
|
||||
remoteSegmentsLost int64
|
||||
remoteSegmentsFailedToCheck int64
|
||||
remoteSegmentInfo []string
|
||||
// remoteSegmentsOverThreshold[0]=# of healthy=rt+1, remoteSegmentsOverThreshold[1]=# of healthy=rt+2, etc...
|
||||
remoteSegmentsOverThreshold [5]int64
|
||||
@ -130,6 +131,7 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
||||
|
||||
mon.IntVal("remote_files_checked").Observe(observer.monStats.objectsChecked) //locked
|
||||
mon.IntVal("remote_segments_checked").Observe(observer.monStats.remoteSegmentsChecked) //locked
|
||||
mon.IntVal("remote_segments_failed_to_check").Observe(observer.monStats.remoteSegmentsFailedToCheck) //locked
|
||||
mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) //locked
|
||||
mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) //locked
|
||||
mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) //locked
|
||||
@ -139,6 +141,11 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
||||
mon.IntVal("remote_segments_over_threshold_4").Observe(observer.monStats.remoteSegmentsOverThreshold[3]) //locked
|
||||
mon.IntVal("remote_segments_over_threshold_5").Observe(observer.monStats.remoteSegmentsOverThreshold[4]) //locked
|
||||
|
||||
allUnhealthy := observer.monStats.remoteSegmentsNeedingRepair + observer.monStats.remoteSegmentsFailedToCheck
|
||||
allChecked := observer.monStats.remoteSegmentsChecked
|
||||
allHealthy := allChecked - allUnhealthy
|
||||
mon.FloatVal("remote_segments_healthy_percentage").Observe(100 * float64(allHealthy) / float64(allChecked)) //locked
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -245,6 +252,7 @@ func (obs *checkerObserver) RemoteSegment(ctx context.Context, path metainfo.Sco
|
||||
|
||||
missingPieces, err := obs.nodestate.MissingPieces(ctx, pointer.CreationDate, pieces)
|
||||
if err != nil {
|
||||
obs.monStats.remoteSegmentsFailedToCheck++
|
||||
return errs.Combine(Error.New("error getting missing pieces"), err)
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user