cmd/tools/segment-verify: better handle timeouts

When we are verifying pieces by downloading the first byte, if we
encounter a timeout, treat the node as if we failed to connect to it,
and log the error once instead of twice.

Change-Id: I70602d554183c98f1213f3ffb1bfec41100ea0e7
This commit is contained in:
paul cannon 2023-01-05 12:00:00 -06:00 committed by Storj Robot
parent 3a9ad48345
commit 23acee2df0

View File

@ -5,6 +5,7 @@ package main
import (
"context"
"errors"
"io"
"time"
@ -150,7 +151,7 @@ func (service *NodeVerifier) Verify(ctx context.Context, alias metabase.NodeAlia
return i, Error.Wrap(err)
}
switch outcome {
case audit.OutcomeNodeOffline:
case audit.OutcomeNodeOffline, audit.OutcomeTimedOut:
_ = client.Close()
client = nil
case audit.OutcomeFailure:
@ -219,11 +220,14 @@ func (service *NodeVerifier) verifySegment(ctx context.Context, client *piecesto
err = errs.Combine(errClose, errRead)
if err != nil {
logger.Error("stream read failed", zap.Error(err))
if errs2.IsRPC(err, rpcstatus.NotFound) {
logger.Info("segment not found", zap.Error(err))
return audit.OutcomeFailure, nil
}
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
logger.Info("failed to get timely response when asking for piece", zap.Error(err))
return audit.OutcomeTimedOut, nil
}
logger.Error("read/close failed", zap.Error(err))
return audit.OutcomeUnknownError, nil