satellite/repair: add logging for irreparable segments in checker

If the checker sees an irreparable segment, log out some info
so we can see what the problem is

Change-Id: I76eda5270214205f4fefc646d6c391cc13ddcafd
This commit is contained in:
Cameron Ayer 2021-08-17 11:18:42 -04:00
parent 6d876acfbf
commit a7cda642a5
2 changed files with 12 additions and 5 deletions

View File

@ -6,6 +6,7 @@ package checker
import (
"bytes"
"context"
"strings"
"time"
"github.com/spacemonkeygo/monkit/v3"
@ -320,6 +321,12 @@ func (obs *checkerObserver) RemoteSegment(ctx context.Context, segment *segmentl
mon.Counter("checker_segments_below_min_req").Inc(1) //mon:locked
stats.segmentsBelowMinReq.Inc(1)
var unhealthyNodes []string
for _, p := range missingPieces {
unhealthyNodes = append(unhealthyNodes, p.StorageNode.String())
}
obs.log.Warn("checker found irreparable segment", zap.String("Segment StreamID", segment.StreamID.String()), zap.Int("Segment Position",
int(segment.Position.Encode())), zap.Int("total pieces", len(pieces)), zap.Int("min required", required), zap.String("unhealthy node IDs", strings.Join(unhealthyNodes, ",")))
}
} else {
if numHealthy > repairThreshold && numHealthy <= (repairThreshold+len(obs.monStats.remoteSegmentsOverThreshold)) {

View File

@ -63,17 +63,17 @@ func (cache *ReliabilityCache) NumNodes(ctx context.Context) (numNodes int, err
}
// MissingPieces returns piece indices that are unreliable with the given staleness period.
func (cache *ReliabilityCache) MissingPieces(ctx context.Context, created time.Time, pieces metabase.Pieces) (_ []int32, err error) {
func (cache *ReliabilityCache) MissingPieces(ctx context.Context, created time.Time, pieces metabase.Pieces) (_ []metabase.Piece, err error) {
defer mon.Task()(&ctx)(&err)
state, err := cache.loadFast(ctx, created)
if err != nil {
return nil, err
}
var unreliable []int32
for _, piece := range pieces {
if _, ok := state.reliable[piece.StorageNode]; !ok {
unreliable = append(unreliable, int32(piece.Number))
var unreliable []metabase.Piece
for _, p := range pieces {
if _, ok := state.reliable[p.StorageNode]; !ok {
unreliable = append(unreliable, p)
}
}
return unreliable, nil