satellite/repair: clean up logging (#3833)

Co-authored-by: Michal Niewrzal <michal@storj.io>
This commit is contained in:
littleskunk 2020-03-30 11:59:56 +02:00 committed by GitHub
parent 831668478a
commit 048ca4558f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 27 deletions

View File

@ -127,8 +127,7 @@ func (ec *ECRepairer) Get(ctx context.Context, limits []*pb.AddressedOrderLimit,
NodeId: limit.GetLimit().StorageNodeId,
})
} else {
ec.log.Debug("Failed to download pieces for repair",
zap.Binary("Segment", []byte(path)),
ec.log.Info("Failed to download pieces for repair",
zap.Error(err))
}
return
@ -318,8 +317,7 @@ func (ec *ECRepairer) Repair(ctx context.Context, limits []*pb.AddressedOrderLim
infos <- info{i: i, err: err, hash: hash}
}(i, addressedLimit)
}
ec.log.Info("Starting a timer for repair so that the number of pieces will be closer to the success threshold",
zap.Binary("Segment", []byte(path)),
ec.log.Debug("Starting a timer for repair so that the number of pieces will be closer to the success threshold",
zap.Duration("Timer", timeout),
zap.Int("Node Count", nonNilCount(limits)),
zap.Int("Optimal Threshold", rs.OptimalThreshold()),
@ -328,8 +326,7 @@ func (ec *ECRepairer) Repair(ctx context.Context, limits []*pb.AddressedOrderLim
var successfulCount, failureCount, cancellationCount int32
timer := time.AfterFunc(timeout, func() {
if ctx.Err() != context.Canceled {
ec.log.Info("Timer expired. Canceling the long tail...",
zap.Binary("Segment", []byte(path)),
ec.log.Debug("Timer expired. Canceling the long tail...",
zap.Int32("Successfully repaired", atomic.LoadInt32(&successfulCount)),
)
cancel()
@ -352,8 +349,7 @@ func (ec *ECRepairer) Repair(ctx context.Context, limits []*pb.AddressedOrderLim
} else {
cancellationCount++
}
ec.log.Debug("Repair to storage node failed",
zap.Binary("Segment", []byte(path)),
ec.log.Info("Repair to storage node failed",
zap.Stringer("Node ID", limits[info.i].GetLimit().StorageNodeId),
zap.Error(info.err),
)
@ -385,7 +381,6 @@ func (ec *ECRepairer) Repair(ctx context.Context, limits []*pb.AddressedOrderLim
}
ec.log.Info("Successfully repaired",
zap.Binary("Segment", []byte(path)),
zap.Int32("Success Count", atomic.LoadInt32(&successfulCount)),
)
@ -418,7 +413,6 @@ func (ec *ECRepairer) putPiece(ctx, parent context.Context, limit *pb.AddressedO
})
if err != nil {
ec.log.Debug("Failed dialing for putting piece to node",
zap.Binary("Segment", []byte(path)),
zap.Stringer("Piece ID", pieceID),
zap.Stringer("Node ID", storageNodeID),
zap.Error(err),
@ -430,7 +424,6 @@ func (ec *ECRepairer) putPiece(ctx, parent context.Context, limit *pb.AddressedO
upload, err := ps.Upload(ctx, limit.GetLimit(), privateKey)
if err != nil {
ec.log.Debug("Failed requesting upload of pieces to node",
zap.Binary("Segment", []byte(path)),
zap.Stringer("Piece ID", pieceID),
zap.Stringer("Node ID", storageNodeID),
zap.Error(err),
@ -453,12 +446,10 @@ func (ec *ECRepairer) putPiece(ctx, parent context.Context, limit *pb.AddressedO
// to slow connection. No error logging for this case.
if ctx.Err() == context.Canceled {
if parent.Err() == context.Canceled {
ec.log.Info("Upload to node canceled by user",
zap.Binary("Segment", []byte(path)),
ec.log.Debug("Upload to node canceled by user",
zap.Stringer("Node ID", storageNodeID))
} else {
ec.log.Debug("Node cut from upload due to slow connection",
zap.Binary("Segment", []byte(path)),
zap.Stringer("Node ID", storageNodeID))
}
err = context.Canceled
@ -469,7 +460,6 @@ func (ec *ECRepairer) putPiece(ctx, parent context.Context, limit *pb.AddressedO
}
ec.log.Debug("Failed uploading piece to node",
zap.Binary("Segment", []byte(path)),
zap.Stringer("Piece ID", pieceID),
zap.Stringer("Node ID", storageNodeID),
zap.String("Node Address", nodeAddress),

View File

@ -133,7 +133,7 @@ func (service *Service) process(ctx context.Context) (err error) {
cancel()
return err
}
service.log.Info("Retrieved segment from repair queue", zap.Binary("Segment", seg.GetPath()))
service.log.Debug("Retrieved segment from repair queue")
// this goroutine inherits the JobLimiter semaphore acquisition and is now responsible
// for releasing it.
@ -142,7 +142,7 @@ func (service *Service) process(ctx context.Context) (err error) {
defer cancel()
if err := service.worker(ctx, seg); err != nil {
service.log.Error("repair worker failed:", zap.Binary("Segment", seg.GetPath()), zap.Error(err))
service.log.Error("repair worker failed:", zap.Error(err))
}
}()
@ -154,16 +154,13 @@ func (service *Service) worker(ctx context.Context, seg *pb.InjuredSegment) (err
workerStartTime := time.Now().UTC()
service.log.Info("Limiter running repair on segment",
zap.Binary("Segment", seg.GetPath()),
zap.String("Segment Path", string(seg.GetPath())))
service.log.Debug("Limiter running repair on segment")
// note that shouldDelete is used even in the case where err is not null
shouldDelete, err := service.repairer.Repair(ctx, string(seg.GetPath()))
if shouldDelete {
if irreparableErr, ok := err.(*irreparableError); ok {
service.log.Error("segment could not be repaired! adding to irreparableDB for more attention",
zap.Error(err),
zap.Binary("segment", seg.GetPath()))
zap.Error(err))
segmentInfo := &pb.IrreparableSegment{
Path: seg.GetPath(),
SegmentDetail: irreparableErr.segmentInfo,
@ -177,11 +174,9 @@ func (service *Service) worker(ctx context.Context, seg *pb.InjuredSegment) (err
}
} else if err != nil {
service.log.Error("unexpected error repairing segment!",
zap.Error(err),
zap.Binary("segment", seg.GetPath()))
zap.Error(err))
} else {
service.log.Info("removing repaired segment from repair queue",
zap.Binary("Segment", seg.GetPath()))
service.log.Debug("removing repaired segment from repair queue")
}
if shouldDelete {
delErr := service.queue.Delete(ctx, seg)

View File

@ -105,7 +105,7 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, path storj.Path) (s
if storj.ErrObjectNotFound.Has(err) {
mon.Meter("repair_unnecessary").Mark(1) //locked
mon.Meter("segment_deleted_before_repair").Mark(1) //locked
repairer.log.Debug("segment was deleted", zap.Binary("Segment", []byte(path)))
repairer.log.Debug("segment was deleted")
return true, nil
}
return false, metainfoGetError.Wrap(err)