satellite/repair: add monitoring for remote_segments_healthy_percentage
Change-Id: I6ad29fe1a947ac19d15e40ea33164a510eb33d4f
This commit is contained in:
parent
2f991b6c56
commit
ba5991dc86
@ -60,6 +60,8 @@ storj.io/storj/satellite/repair/checker."checker_segment_total_count" IntVal
|
|||||||
storj.io/storj/satellite/repair/checker."remote_files_checked" IntVal
|
storj.io/storj/satellite/repair/checker."remote_files_checked" IntVal
|
||||||
storj.io/storj/satellite/repair/checker."remote_files_lost" IntVal
|
storj.io/storj/satellite/repair/checker."remote_files_lost" IntVal
|
||||||
storj.io/storj/satellite/repair/checker."remote_segments_checked" IntVal
|
storj.io/storj/satellite/repair/checker."remote_segments_checked" IntVal
|
||||||
|
storj.io/storj/satellite/repair/checker."remote_segments_failed_to_check" IntVal
|
||||||
|
storj.io/storj/satellite/repair/checker."remote_segments_healthy_percentage" FloatVal
|
||||||
storj.io/storj/satellite/repair/checker."remote_segments_lost" IntVal
|
storj.io/storj/satellite/repair/checker."remote_segments_lost" IntVal
|
||||||
storj.io/storj/satellite/repair/checker."remote_segments_needing_repair" IntVal
|
storj.io/storj/satellite/repair/checker."remote_segments_needing_repair" IntVal
|
||||||
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_1" IntVal
|
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_1" IntVal
|
||||||
|
@ -43,6 +43,7 @@ type durabilityStats struct {
|
|||||||
remoteSegmentsChecked int64
|
remoteSegmentsChecked int64
|
||||||
remoteSegmentsNeedingRepair int64
|
remoteSegmentsNeedingRepair int64
|
||||||
remoteSegmentsLost int64
|
remoteSegmentsLost int64
|
||||||
|
remoteSegmentsFailedToCheck int64
|
||||||
remoteSegmentInfo []string
|
remoteSegmentInfo []string
|
||||||
// remoteSegmentsOverThreshold[0]=# of healthy=rt+1, remoteSegmentsOverThreshold[1]=# of healthy=rt+2, etc...
|
// remoteSegmentsOverThreshold[0]=# of healthy=rt+1, remoteSegmentsOverThreshold[1]=# of healthy=rt+2, etc...
|
||||||
remoteSegmentsOverThreshold [5]int64
|
remoteSegmentsOverThreshold [5]int64
|
||||||
@ -130,6 +131,7 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
|||||||
|
|
||||||
mon.IntVal("remote_files_checked").Observe(observer.monStats.objectsChecked) //locked
|
mon.IntVal("remote_files_checked").Observe(observer.monStats.objectsChecked) //locked
|
||||||
mon.IntVal("remote_segments_checked").Observe(observer.monStats.remoteSegmentsChecked) //locked
|
mon.IntVal("remote_segments_checked").Observe(observer.monStats.remoteSegmentsChecked) //locked
|
||||||
|
mon.IntVal("remote_segments_failed_to_check").Observe(observer.monStats.remoteSegmentsFailedToCheck) //locked
|
||||||
mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) //locked
|
mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) //locked
|
||||||
mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) //locked
|
mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) //locked
|
||||||
mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) //locked
|
mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) //locked
|
||||||
@ -139,6 +141,11 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
|||||||
mon.IntVal("remote_segments_over_threshold_4").Observe(observer.monStats.remoteSegmentsOverThreshold[3]) //locked
|
mon.IntVal("remote_segments_over_threshold_4").Observe(observer.monStats.remoteSegmentsOverThreshold[3]) //locked
|
||||||
mon.IntVal("remote_segments_over_threshold_5").Observe(observer.monStats.remoteSegmentsOverThreshold[4]) //locked
|
mon.IntVal("remote_segments_over_threshold_5").Observe(observer.monStats.remoteSegmentsOverThreshold[4]) //locked
|
||||||
|
|
||||||
|
allUnhealthy := observer.monStats.remoteSegmentsNeedingRepair + observer.monStats.remoteSegmentsFailedToCheck
|
||||||
|
allChecked := observer.monStats.remoteSegmentsChecked
|
||||||
|
allHealthy := allChecked - allUnhealthy
|
||||||
|
mon.FloatVal("remote_segments_healthy_percentage").Observe(100 * float64(allHealthy) / float64(allChecked)) //locked
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -245,6 +252,7 @@ func (obs *checkerObserver) RemoteSegment(ctx context.Context, path metainfo.Sco
|
|||||||
|
|
||||||
missingPieces, err := obs.nodestate.MissingPieces(ctx, pointer.CreationDate, pieces)
|
missingPieces, err := obs.nodestate.MissingPieces(ctx, pointer.CreationDate, pieces)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
obs.monStats.remoteSegmentsFailedToCheck++
|
||||||
return errs.Combine(Error.New("error getting missing pieces"), err)
|
return errs.Combine(Error.New("error getting missing pieces"), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user