satellite/repair: add logging for irreparable segments in checker
If the checker sees an irreparable segment, log out some info so we can see what the problem is Change-Id: I76eda5270214205f4fefc646d6c391cc13ddcafd
This commit is contained in:
parent
6d876acfbf
commit
a7cda642a5
@ -6,6 +6,7 @@ package checker
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/spacemonkeygo/monkit/v3"
|
||||
@ -320,6 +321,12 @@ func (obs *checkerObserver) RemoteSegment(ctx context.Context, segment *segmentl
|
||||
|
||||
mon.Counter("checker_segments_below_min_req").Inc(1) //mon:locked
|
||||
stats.segmentsBelowMinReq.Inc(1)
|
||||
var unhealthyNodes []string
|
||||
for _, p := range missingPieces {
|
||||
unhealthyNodes = append(unhealthyNodes, p.StorageNode.String())
|
||||
}
|
||||
obs.log.Warn("checker found irreparable segment", zap.String("Segment StreamID", segment.StreamID.String()), zap.Int("Segment Position",
|
||||
int(segment.Position.Encode())), zap.Int("total pieces", len(pieces)), zap.Int("min required", required), zap.String("unhealthy node IDs", strings.Join(unhealthyNodes, ",")))
|
||||
}
|
||||
} else {
|
||||
if numHealthy > repairThreshold && numHealthy <= (repairThreshold+len(obs.monStats.remoteSegmentsOverThreshold)) {
|
||||
|
@ -63,17 +63,17 @@ func (cache *ReliabilityCache) NumNodes(ctx context.Context) (numNodes int, err
|
||||
}
|
||||
|
||||
// MissingPieces returns piece indices that are unreliable with the given staleness period.
|
||||
func (cache *ReliabilityCache) MissingPieces(ctx context.Context, created time.Time, pieces metabase.Pieces) (_ []int32, err error) {
|
||||
func (cache *ReliabilityCache) MissingPieces(ctx context.Context, created time.Time, pieces metabase.Pieces) (_ []metabase.Piece, err error) {
|
||||
defer mon.Task()(&ctx)(&err)
|
||||
|
||||
state, err := cache.loadFast(ctx, created)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var unreliable []int32
|
||||
for _, piece := range pieces {
|
||||
if _, ok := state.reliable[piece.StorageNode]; !ok {
|
||||
unreliable = append(unreliable, int32(piece.Number))
|
||||
var unreliable []metabase.Piece
|
||||
for _, p := range pieces {
|
||||
if _, ok := state.reliable[p.StorageNode]; !ok {
|
||||
unreliable = append(unreliable, p)
|
||||
}
|
||||
}
|
||||
return unreliable, nil
|
||||
|
Loading…
Reference in New Issue
Block a user