satellite/repair/checker: add monkit metrics for segments immediately above repair threshold
Record counts for segments at health=rt+1 through health=rt+5 for every checker iteration. Change-Id: I2a00c0bc34d17beb21cacdeab4dac77f755faefe
This commit is contained in:
parent
46228fee92
commit
d5540c89a1
@ -61,6 +61,11 @@ storj.io/storj/satellite/repair/checker."remote_files_lost" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_checked" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_lost" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_needing_repair" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_1" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_2" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_3" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_4" IntVal
|
||||
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_5" IntVal
|
||||
storj.io/storj/satellite/repair/repairer."download_failed_not_enough_pieces_repair" Meter
|
||||
storj.io/storj/satellite/repair/repairer."healthy_ratio_after_repair" FloatVal
|
||||
storj.io/storj/satellite/repair/repairer."healthy_ratio_before_repair" FloatVal
|
||||
|
@ -44,6 +44,8 @@ type durabilityStats struct {
|
||||
remoteSegmentsNeedingRepair int64
|
||||
remoteSegmentsLost int64
|
||||
remoteSegmentInfo []string
|
||||
// remoteSegmentsOverThreshold[0]=# of healthy=rt+1, remoteSegmentsOverThreshold[1]=# of healthy=rt+2, etc...
|
||||
remoteSegmentsOverThreshold [5]int64
|
||||
}
|
||||
|
||||
// Checker contains the information needed to do checks for missing pieces
|
||||
@ -126,11 +128,16 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
||||
return err
|
||||
}
|
||||
|
||||
mon.IntVal("remote_files_checked").Observe(observer.monStats.objectsChecked) //locked
|
||||
mon.IntVal("remote_segments_checked").Observe(observer.monStats.remoteSegmentsChecked) //locked
|
||||
mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) //locked
|
||||
mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) //locked
|
||||
mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) //locked
|
||||
mon.IntVal("remote_files_checked").Observe(observer.monStats.objectsChecked) //locked
|
||||
mon.IntVal("remote_segments_checked").Observe(observer.monStats.remoteSegmentsChecked) //locked
|
||||
mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) //locked
|
||||
mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) //locked
|
||||
mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) //locked
|
||||
mon.IntVal("remote_segments_over_threshold_1").Observe(observer.monStats.remoteSegmentsOverThreshold[0]) //locked
|
||||
mon.IntVal("remote_segments_over_threshold_2").Observe(observer.monStats.remoteSegmentsOverThreshold[1]) //locked
|
||||
mon.IntVal("remote_segments_over_threshold_3").Observe(observer.monStats.remoteSegmentsOverThreshold[2]) //locked
|
||||
mon.IntVal("remote_segments_over_threshold_4").Observe(observer.monStats.remoteSegmentsOverThreshold[3]) //locked
|
||||
mon.IntVal("remote_segments_over_threshold_5").Observe(observer.monStats.remoteSegmentsOverThreshold[4]) //locked
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -312,6 +319,15 @@ func (obs *checkerObserver) RemoteSegment(ctx context.Context, path metainfo.Sco
|
||||
obs.log.Error("error handling irreparable segment to queue", zap.Error(err))
|
||||
return nil
|
||||
}
|
||||
} else if numHealthy > redundancy.RepairThreshold && numHealthy <= (redundancy.RepairThreshold+int32(len(obs.monStats.remoteSegmentsOverThreshold))) {
|
||||
// record metrics for segments right above repair threshold
|
||||
// numHealthy=repairThreshold+1 through numHealthy=repairThreshold+5
|
||||
for i := range obs.monStats.remoteSegmentsOverThreshold {
|
||||
if numHealthy == (redundancy.RepairThreshold + int32(i) + 1) {
|
||||
obs.monStats.remoteSegmentsOverThreshold[i]++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
Loading…
Reference in New Issue
Block a user