satellite/repair/checker: add monkit metrics for segments immediately above repair threshold
Record counts for segments at health=rt+1 through health=rt+5 for every checker iteration. Change-Id: I2a00c0bc34d17beb21cacdeab4dac77f755faefe
This commit is contained in:
parent
46228fee92
commit
d5540c89a1
@ -61,6 +61,11 @@ storj.io/storj/satellite/repair/checker."remote_files_lost" IntVal
|
|||||||
storj.io/storj/satellite/repair/checker."remote_segments_checked" IntVal
|
storj.io/storj/satellite/repair/checker."remote_segments_checked" IntVal
|
||||||
storj.io/storj/satellite/repair/checker."remote_segments_lost" IntVal
|
storj.io/storj/satellite/repair/checker."remote_segments_lost" IntVal
|
||||||
storj.io/storj/satellite/repair/checker."remote_segments_needing_repair" IntVal
|
storj.io/storj/satellite/repair/checker."remote_segments_needing_repair" IntVal
|
||||||
|
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_1" IntVal
|
||||||
|
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_2" IntVal
|
||||||
|
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_3" IntVal
|
||||||
|
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_4" IntVal
|
||||||
|
storj.io/storj/satellite/repair/checker."remote_segments_over_threshold_5" IntVal
|
||||||
storj.io/storj/satellite/repair/repairer."download_failed_not_enough_pieces_repair" Meter
|
storj.io/storj/satellite/repair/repairer."download_failed_not_enough_pieces_repair" Meter
|
||||||
storj.io/storj/satellite/repair/repairer."healthy_ratio_after_repair" FloatVal
|
storj.io/storj/satellite/repair/repairer."healthy_ratio_after_repair" FloatVal
|
||||||
storj.io/storj/satellite/repair/repairer."healthy_ratio_before_repair" FloatVal
|
storj.io/storj/satellite/repair/repairer."healthy_ratio_before_repair" FloatVal
|
||||||
|
@ -44,6 +44,8 @@ type durabilityStats struct {
|
|||||||
remoteSegmentsNeedingRepair int64
|
remoteSegmentsNeedingRepair int64
|
||||||
remoteSegmentsLost int64
|
remoteSegmentsLost int64
|
||||||
remoteSegmentInfo []string
|
remoteSegmentInfo []string
|
||||||
|
// remoteSegmentsOverThreshold[0]=# of healthy=rt+1, remoteSegmentsOverThreshold[1]=# of healthy=rt+2, etc...
|
||||||
|
remoteSegmentsOverThreshold [5]int64
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checker contains the information needed to do checks for missing pieces
|
// Checker contains the information needed to do checks for missing pieces
|
||||||
@ -131,6 +133,11 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
|||||||
mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) //locked
|
mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) //locked
|
||||||
mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) //locked
|
mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) //locked
|
||||||
mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) //locked
|
mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) //locked
|
||||||
|
mon.IntVal("remote_segments_over_threshold_1").Observe(observer.monStats.remoteSegmentsOverThreshold[0]) //locked
|
||||||
|
mon.IntVal("remote_segments_over_threshold_2").Observe(observer.monStats.remoteSegmentsOverThreshold[1]) //locked
|
||||||
|
mon.IntVal("remote_segments_over_threshold_3").Observe(observer.monStats.remoteSegmentsOverThreshold[2]) //locked
|
||||||
|
mon.IntVal("remote_segments_over_threshold_4").Observe(observer.monStats.remoteSegmentsOverThreshold[3]) //locked
|
||||||
|
mon.IntVal("remote_segments_over_threshold_5").Observe(observer.monStats.remoteSegmentsOverThreshold[4]) //locked
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -312,6 +319,15 @@ func (obs *checkerObserver) RemoteSegment(ctx context.Context, path metainfo.Sco
|
|||||||
obs.log.Error("error handling irreparable segment to queue", zap.Error(err))
|
obs.log.Error("error handling irreparable segment to queue", zap.Error(err))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
} else if numHealthy > redundancy.RepairThreshold && numHealthy <= (redundancy.RepairThreshold+int32(len(obs.monStats.remoteSegmentsOverThreshold))) {
|
||||||
|
// record metrics for segments right above repair threshold
|
||||||
|
// numHealthy=repairThreshold+1 through numHealthy=repairThreshold+5
|
||||||
|
for i := range obs.monStats.remoteSegmentsOverThreshold {
|
||||||
|
if numHealthy == (redundancy.RepairThreshold + int32(i) + 1) {
|
||||||
|
obs.monStats.remoteSegmentsOverThreshold[i]++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
Loading…
Reference in New Issue
Block a user