2019-09-06 20:20:36 +01:00
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package repairer
import (
"bytes"
"context"
2020-07-16 16:50:15 +01:00
"errors"
2021-08-06 18:58:22 +01:00
"fmt"
2019-09-06 20:20:36 +01:00
"io"
"io/ioutil"
"sort"
"sync"
"sync/atomic"
"time"
2020-03-27 19:00:57 +00:00
"github.com/calebcase/tmpfile"
2019-09-06 20:20:36 +01:00
"github.com/vivint/infectious"
"github.com/zeebo/errs"
"go.uber.org/zap"
2019-12-27 11:48:47 +00:00
"storj.io/common/errs2"
"storj.io/common/pb"
"storj.io/common/pkcrypto"
"storj.io/common/rpc"
"storj.io/common/signing"
"storj.io/common/storj"
"storj.io/common/sync2"
2021-08-03 14:21:27 +01:00
"storj.io/storj/satellite/audit"
"storj.io/storj/satellite/metabase"
2020-02-21 14:07:29 +00:00
"storj.io/uplink/private/eestream"
"storj.io/uplink/private/piecestore"
2019-09-06 20:20:36 +01:00
)
2019-09-13 17:21:20 +01:00
// ErrPieceHashVerifyFailed is the errs class when a piece hash downloaded from storagenode fails to match the original hash.
var ErrPieceHashVerifyFailed = errs . Class ( "piece hashes don't match" )
2019-09-06 20:20:36 +01:00
// ECRepairer allows the repairer to download, verify, and upload pieces from storagenodes.
type ECRepairer struct {
log * zap . Logger
2019-09-19 05:46:39 +01:00
dialer rpc . Dialer
2019-09-06 20:20:36 +01:00
satelliteSignee signing . Signee
2019-10-30 20:31:08 +00:00
downloadTimeout time . Duration
2020-03-27 19:00:57 +00:00
inmemory bool
2019-09-06 20:20:36 +01:00
}
// NewECRepairer creates a new repairer for interfacing with storagenodes.
2020-03-27 19:00:57 +00:00
func NewECRepairer ( log * zap . Logger , dialer rpc . Dialer , satelliteSignee signing . Signee , downloadTimeout time . Duration , inmemory bool ) * ECRepairer {
2019-09-06 20:20:36 +01:00
return & ECRepairer {
log : log ,
2019-09-19 05:46:39 +01:00
dialer : dialer ,
2019-09-06 20:20:36 +01:00
satelliteSignee : satelliteSignee ,
2019-10-30 20:31:08 +00:00
downloadTimeout : downloadTimeout ,
2020-03-27 19:00:57 +00:00
inmemory : inmemory ,
2019-09-06 20:20:36 +01:00
}
}
2020-05-19 16:49:13 +01:00
func ( ec * ECRepairer ) dialPiecestore ( ctx context . Context , n storj . NodeURL ) ( * piecestore . Client , error ) {
2021-05-06 14:53:55 +01:00
return piecestore . Dial ( ctx , ec . dialer , n , piecestore . DefaultConfig )
2019-09-06 20:20:36 +01:00
}
// Get downloads pieces from storagenodes using the provided order limits, and decodes those pieces into a segment.
// It attempts to download from the minimum required number based on the redundancy scheme.
// After downloading a piece, the ECRepairer will verify the hash and original order limit for that piece.
// If verification fails, another piece will be downloaded until we reach the minimum required or run out of order limits.
2019-09-13 17:21:20 +01:00
// If piece hash verification fails, it will return all failed node IDs.
2021-08-03 14:21:27 +01:00
func ( ec * ECRepairer ) Get ( ctx context . Context , limits [ ] * pb . AddressedOrderLimit , cachedIPsAndPorts map [ storj . NodeID ] string , privateKey storj . PiecePrivateKey , es eestream . ErasureScheme , dataSize int64 ) ( _ io . ReadCloser , _ audit . Pieces , err error ) {
2019-09-06 20:20:36 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
if len ( limits ) != es . TotalCount ( ) {
2021-08-03 14:21:27 +01:00
return nil , audit . Pieces { } , Error . New ( "number of limits slice (%d) does not match total count (%d) of erasure scheme" , len ( limits ) , es . TotalCount ( ) )
2019-09-06 20:20:36 +01:00
}
nonNilLimits := nonNilCount ( limits )
if nonNilLimits < es . RequiredCount ( ) {
2021-08-03 14:21:27 +01:00
return nil , audit . Pieces { } , Error . New ( "number of non-nil limits (%d) is less than required count (%d) of erasure scheme" , nonNilCount ( limits ) , es . RequiredCount ( ) )
2019-09-06 20:20:36 +01:00
}
pieceSize := eestream . CalcPieceSize ( dataSize , es )
var successfulPieces , inProgress int
unusedLimits := nonNilLimits
pieceReaders := make ( map [ int ] io . ReadCloser )
2021-08-03 14:21:27 +01:00
var pieces audit . Pieces
2019-09-06 20:20:36 +01:00
limiter := sync2 . NewLimiter ( es . RequiredCount ( ) )
cond := sync . NewCond ( & sync . Mutex { } )
2021-08-10 14:23:56 +01:00
var errlist errs . Group
var mu sync . Mutex
2019-09-06 20:20:36 +01:00
for currentLimitIndex , limit := range limits {
if limit == nil {
continue
}
currentLimitIndex , limit := currentLimitIndex , limit
limiter . Go ( ctx , func ( ) {
cond . L . Lock ( )
defer cond . Signal ( )
defer cond . L . Unlock ( )
for {
if successfulPieces >= es . RequiredCount ( ) {
// already downloaded minimum number of pieces
cond . Broadcast ( )
return
}
if successfulPieces + inProgress + unusedLimits < es . RequiredCount ( ) {
// not enough available limits left to get required number of pieces
cond . Broadcast ( )
return
}
if successfulPieces + inProgress >= es . RequiredCount ( ) {
cond . Wait ( )
continue
}
unusedLimits --
inProgress ++
cond . L . Unlock ( )
2021-07-13 14:52:37 +01:00
lastIPPort := cachedIPsAndPorts [ limit . GetLimit ( ) . StorageNodeId ]
address := limit . GetStorageNodeAddress ( ) . GetAddress ( )
var triedLastIPPort bool
if lastIPPort != "" && lastIPPort != address {
address = lastIPPort
triedLastIPPort = true
}
pieceReadCloser , err := ec . downloadAndVerifyPiece ( ctx , limit , address , privateKey , pieceSize )
// if piecestore dial with last ip:port failed try again with node address
if triedLastIPPort && piecestore . Error . Has ( err ) {
pieceReadCloser , err = ec . downloadAndVerifyPiece ( ctx , limit , limit . GetStorageNodeAddress ( ) . GetAddress ( ) , privateKey , pieceSize )
}
2021-08-03 14:21:27 +01:00
2019-09-06 20:20:36 +01:00
cond . L . Lock ( )
inProgress --
2021-08-03 14:21:27 +01:00
piece := metabase . Piece {
Number : uint16 ( currentLimitIndex ) ,
StorageNode : limit . GetLimit ( ) . StorageNodeId ,
}
2019-09-06 20:20:36 +01:00
if err != nil {
2019-09-16 18:13:24 +01:00
// gather nodes where the calculated piece hash doesn't match the uplink signed piece hash
2019-09-13 17:21:20 +01:00
if ErrPieceHashVerifyFailed . Has ( err ) {
2021-04-21 21:41:19 +01:00
ec . log . Info ( "audit failed" , zap . Stringer ( "node ID" , limit . GetLimit ( ) . StorageNodeId ) ,
zap . String ( "reason" , err . Error ( ) ) )
2021-08-03 14:21:27 +01:00
pieces . Failed = append ( pieces . Failed , piece )
return
}
pieceAudit := audit . PieceAuditFromErr ( err )
switch pieceAudit {
case audit . PieceAuditFailure :
ec . log . Debug ( "Failed to download pieces for repair: piece not found (audit failed)" ,
zap . Stringer ( "Node ID" , limit . GetLimit ( ) . StorageNodeId ) ,
zap . Error ( err ) )
pieces . Failed = append ( pieces . Failed , piece )
case audit . PieceAuditOffline :
ec . log . Debug ( "Failed to download pieces for repair: dial timeout (offline)" ,
zap . Stringer ( "Node ID" , limit . GetLimit ( ) . StorageNodeId ) ,
zap . Error ( err ) )
pieces . Offline = append ( pieces . Offline , piece )
case audit . PieceAuditContained :
ec . log . Info ( "Failed to download pieces for repair: download timeout (contained)" ,
zap . Stringer ( "Node ID" , limit . GetLimit ( ) . StorageNodeId ) ,
2019-10-16 16:28:56 +01:00
zap . Error ( err ) )
2021-08-03 14:21:27 +01:00
pieces . Contained = append ( pieces . Contained , piece )
case audit . PieceAuditUnknown :
ec . log . Info ( "Failed to download pieces for repair: unknown transport error (skipped)" ,
zap . Stringer ( "Node ID" , limit . GetLimit ( ) . StorageNodeId ) ,
zap . Error ( err ) )
pieces . Unknown = append ( pieces . Unknown , piece )
2019-09-13 17:21:20 +01:00
}
2021-08-03 14:21:27 +01:00
2021-08-10 14:23:56 +01:00
mu . Lock ( )
errlist . Add ( fmt . Errorf ( "node id: %s, error: %w" , limit . GetLimit ( ) . StorageNodeId . String ( ) , err ) )
mu . Unlock ( )
2019-09-06 20:20:36 +01:00
return
}
2020-03-18 23:55:09 +00:00
pieceReaders [ currentLimitIndex ] = pieceReadCloser
2021-08-03 14:21:27 +01:00
pieces . Successful = append ( pieces . Successful , piece )
2019-09-06 20:20:36 +01:00
successfulPieces ++
return
}
} )
}
limiter . Wait ( )
if successfulPieces < es . RequiredCount ( ) {
2020-10-13 13:13:41 +01:00
mon . Meter ( "download_failed_not_enough_pieces_repair" ) . Mark ( 1 ) //mon:locked
2021-08-03 14:21:27 +01:00
return nil , pieces , & irreparableError {
2020-02-24 20:13:12 +00:00
piecesAvailable : int32 ( successfulPieces ) ,
piecesRequired : int32 ( es . RequiredCount ( ) ) ,
2021-08-06 18:58:22 +01:00
errlist : errlist ,
2020-02-24 20:13:12 +00:00
}
2019-09-06 20:20:36 +01:00
}
fec , err := infectious . NewFEC ( es . RequiredCount ( ) , es . TotalCount ( ) )
if err != nil {
2021-08-03 14:21:27 +01:00
return nil , pieces , Error . Wrap ( err )
2019-09-06 20:20:36 +01:00
}
esScheme := eestream . NewUnsafeRSScheme ( fec , es . ErasureShareSize ( ) )
expectedSize := pieceSize * int64 ( es . RequiredCount ( ) )
ctx , cancel := context . WithCancel ( ctx )
2021-05-06 14:53:55 +01:00
decodeReader := eestream . DecodeReaders2 ( ctx , cancel , pieceReaders , esScheme , expectedSize , 0 , false )
2019-09-06 20:20:36 +01:00
2021-08-03 14:21:27 +01:00
return decodeReader , pieces , nil
2019-09-06 20:20:36 +01:00
}
// downloadAndVerifyPiece downloads a piece from a storagenode,
// expects the original order limit to have the correct piece public key,
// and expects the hash of the data to match the signed hash provided by the storagenode.
2021-07-13 14:52:37 +01:00
func ( ec * ECRepairer ) downloadAndVerifyPiece ( ctx context . Context , limit * pb . AddressedOrderLimit , address string , privateKey storj . PiecePrivateKey , pieceSize int64 ) ( pieceReadCloser io . ReadCloser , err error ) {
2020-05-28 20:24:52 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-09-06 20:20:36 +01:00
// contact node
2019-10-30 20:31:08 +00:00
downloadCtx , cancel := context . WithTimeout ( ctx , ec . downloadTimeout )
defer cancel ( )
2020-05-19 16:49:13 +01:00
ps , err := ec . dialPiecestore ( downloadCtx , storj . NodeURL {
ID : limit . GetLimit ( ) . StorageNodeId ,
2021-07-13 14:52:37 +01:00
Address : address ,
2019-09-06 20:20:36 +01:00
} )
if err != nil {
return nil , err
}
2019-10-06 18:41:53 +01:00
defer func ( ) { err = errs . Combine ( err , ps . Close ( ) ) } ( )
2019-09-06 20:20:36 +01:00
2019-10-30 20:31:08 +00:00
downloader , err := ps . Download ( downloadCtx , limit . GetLimit ( ) , privateKey , 0 , pieceSize )
2019-09-06 20:20:36 +01:00
if err != nil {
return nil , err
}
defer func ( ) { err = errs . Combine ( err , downloader . Close ( ) ) } ( )
2020-03-18 23:55:09 +00:00
hashWriter := pkcrypto . NewHash ( )
downloadReader := io . TeeReader ( downloader , hashWriter )
var downloadedPieceSize int64
2020-03-27 19:00:57 +00:00
if ec . inmemory {
2020-03-18 23:55:09 +00:00
pieceBytes , err := ioutil . ReadAll ( downloadReader )
if err != nil {
return nil , err
}
downloadedPieceSize = int64 ( len ( pieceBytes ) )
pieceReadCloser = ioutil . NopCloser ( bytes . NewReader ( pieceBytes ) )
} else {
2020-03-27 19:00:57 +00:00
tempfile , err := tmpfile . New ( "" , "satellite-repair-*" )
2020-03-18 23:55:09 +00:00
if err != nil {
return nil , err
}
defer func ( ) {
// close and remove file if there is some error
if err != nil {
2020-03-27 19:00:57 +00:00
err = errs . Combine ( err , tempfile . Close ( ) )
2020-03-18 23:55:09 +00:00
}
} ( )
2020-03-27 19:00:57 +00:00
downloadedPieceSize , err = io . Copy ( tempfile , downloadReader )
2020-03-18 23:55:09 +00:00
if err != nil {
return nil , err
}
2020-03-27 19:00:57 +00:00
2020-03-18 23:55:09 +00:00
// seek to beginning of file so the repair job starts at the beginning of the piece
2020-03-27 19:00:57 +00:00
_ , err = tempfile . Seek ( 0 , io . SeekStart )
2020-03-18 23:55:09 +00:00
if err != nil {
return nil , err
}
2020-03-27 19:00:57 +00:00
pieceReadCloser = tempfile
2019-09-06 20:20:36 +01:00
}
2021-07-20 15:41:38 +01:00
mon . Meter ( "repair_bytes_downloaded" ) . Mark64 ( downloadedPieceSize ) //mon:locked
2020-03-18 23:55:09 +00:00
if downloadedPieceSize != pieceSize {
return nil , Error . New ( "didn't download the correct amount of data, want %d, got %d" , pieceSize , downloadedPieceSize )
2019-09-06 20:20:36 +01:00
}
// get signed piece hash and original order limit
hash , originalLimit := downloader . GetHashAndLimit ( )
if hash == nil {
return nil , Error . New ( "hash was not sent from storagenode" )
}
if originalLimit == nil {
return nil , Error . New ( "original order limit was not sent from storagenode" )
}
// verify order limit from storage node is signed by the satellite
if err := verifyOrderLimitSignature ( ctx , ec . satelliteSignee , originalLimit ) ; err != nil {
return nil , err
}
// verify the hashes from storage node
2020-03-18 23:55:09 +00:00
calculatedHash := hashWriter . Sum ( nil )
2019-09-06 20:20:36 +01:00
if err := verifyPieceHash ( ctx , originalLimit , hash , calculatedHash ) ; err != nil {
2021-04-21 21:41:19 +01:00
2019-09-13 17:21:20 +01:00
return nil , ErrPieceHashVerifyFailed . Wrap ( err )
2019-09-06 20:20:36 +01:00
}
2020-03-18 23:55:09 +00:00
return pieceReadCloser , nil
2019-09-06 20:20:36 +01:00
}
func verifyPieceHash ( ctx context . Context , limit * pb . OrderLimit , hash * pb . PieceHash , expectedHash [ ] byte ) ( err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
if limit == nil || hash == nil || len ( expectedHash ) == 0 {
return Error . New ( "invalid arguments" )
}
if limit . PieceId != hash . PieceId {
return Error . New ( "piece id changed" )
}
if ! bytes . Equal ( hash . Hash , expectedHash ) {
2021-04-21 21:41:19 +01:00
return Error . New ( "hash from storage node, %x, does not match calculated hash, %x" , hash . Hash , expectedHash )
2019-09-06 20:20:36 +01:00
}
if err := signing . VerifyUplinkPieceHashSignature ( ctx , limit . UplinkPublicKey , hash ) ; err != nil {
return Error . New ( "invalid piece hash signature" )
}
return nil
}
func verifyOrderLimitSignature ( ctx context . Context , satellite signing . Signee , limit * pb . OrderLimit ) ( err error ) {
if err := signing . VerifyOrderLimitSignature ( ctx , satellite , limit ) ; err != nil {
return Error . New ( "invalid order limit signature: %v" , err )
}
return nil
}
// Repair takes a provided segment, encodes it with the provided redundancy strategy,
// and uploads the pieces in need of repair to new nodes provided by order limits.
2021-06-17 16:05:04 +01:00
func ( ec * ECRepairer ) Repair ( ctx context . Context , limits [ ] * pb . AddressedOrderLimit , privateKey storj . PiecePrivateKey , rs eestream . RedundancyStrategy , data io . Reader , timeout time . Duration , successfulNeeded int ) ( successfulNodes [ ] * pb . Node , successfulHashes [ ] * pb . PieceHash , err error ) {
2019-09-06 20:20:36 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
pieceCount := len ( limits )
if pieceCount != rs . TotalCount ( ) {
return nil , nil , Error . New ( "size of limits slice (%d) does not match total count (%d) of erasure scheme" , pieceCount , rs . TotalCount ( ) )
}
if ! unique ( limits ) {
return nil , nil , Error . New ( "duplicated nodes are not allowed" )
}
2021-05-06 14:53:55 +01:00
readers , err := eestream . EncodeReader2 ( ctx , ioutil . NopCloser ( data ) , rs )
2019-09-06 20:20:36 +01:00
if err != nil {
return nil , nil , err
}
// info contains data about a single piece transfer
type info struct {
i int
err error
hash * pb . PieceHash
}
// this channel is used to synchronize concurrently uploaded pieces with the overall repair
infos := make ( chan info , pieceCount )
psCtx , cancel := context . WithCancel ( ctx )
defer cancel ( )
for i , addressedLimit := range limits {
go func ( i int , addressedLimit * pb . AddressedOrderLimit ) {
2021-06-17 16:05:04 +01:00
hash , err := ec . putPiece ( psCtx , ctx , addressedLimit , privateKey , readers [ i ] )
2019-09-06 20:20:36 +01:00
infos <- info { i : i , err : err , hash : hash }
} ( i , addressedLimit )
}
2020-03-30 10:59:56 +01:00
ec . log . Debug ( "Starting a timer for repair so that the number of pieces will be closer to the success threshold" ,
2019-09-06 20:20:36 +01:00
zap . Duration ( "Timer" , timeout ) ,
zap . Int ( "Node Count" , nonNilCount ( limits ) ) ,
zap . Int ( "Optimal Threshold" , rs . OptimalThreshold ( ) ) ,
)
var successfulCount , failureCount , cancellationCount int32
timer := time . AfterFunc ( timeout , func ( ) {
2020-07-16 16:50:15 +01:00
if ! errors . Is ( ctx . Err ( ) , context . Canceled ) {
2020-03-30 10:59:56 +01:00
ec . log . Debug ( "Timer expired. Canceling the long tail..." ,
2019-09-06 20:20:36 +01:00
zap . Int32 ( "Successfully repaired" , atomic . LoadInt32 ( & successfulCount ) ) ,
)
cancel ( )
}
} )
successfulNodes = make ( [ ] * pb . Node , pieceCount )
successfulHashes = make ( [ ] * pb . PieceHash , pieceCount )
for range limits {
info := <- infos
if limits [ info . i ] == nil {
continue
}
if info . err != nil {
if ! errs2 . IsCanceled ( info . err ) {
failureCount ++
2020-04-15 20:32:22 +01:00
ec . log . Warn ( "Repair to a storage node failed" ,
zap . Stringer ( "Node ID" , limits [ info . i ] . GetLimit ( ) . StorageNodeId ) ,
zap . Error ( info . err ) ,
)
2019-09-06 20:20:36 +01:00
} else {
cancellationCount ++
2020-04-15 20:32:22 +01:00
ec . log . Debug ( "Repair to storage node cancelled" ,
zap . Stringer ( "Node ID" , limits [ info . i ] . GetLimit ( ) . StorageNodeId ) ,
zap . Error ( info . err ) ,
)
2019-09-06 20:20:36 +01:00
}
continue
}
successfulNodes [ info . i ] = & pb . Node {
Id : limits [ info . i ] . GetLimit ( ) . StorageNodeId ,
Address : limits [ info . i ] . GetStorageNodeAddress ( ) ,
}
successfulHashes [ info . i ] = info . hash
successfulCount ++
2020-05-28 21:19:44 +01:00
if successfulCount >= int32 ( successfulNeeded ) {
ec . log . Debug ( "Number of successful uploads met. Canceling the long tail..." ,
zap . Int32 ( "Successfully repaired" , atomic . LoadInt32 ( & successfulCount ) ) ,
)
cancel ( )
}
2019-09-06 20:20:36 +01:00
}
// Ensure timer is stopped
_ = timer . Stop ( )
// TODO: clean up the partially uploaded segment's pieces
defer func ( ) {
select {
case <- ctx . Done ( ) :
err = Error . New ( "repair cancelled" )
default :
}
} ( )
if successfulCount == 0 {
2019-10-16 16:28:56 +01:00
return nil , nil , Error . New ( "repair to all nodes failed" )
2019-09-06 20:20:36 +01:00
}
2020-04-15 20:32:22 +01:00
ec . log . Debug ( "Successfully repaired" ,
2019-09-06 20:20:36 +01:00
zap . Int32 ( "Success Count" , atomic . LoadInt32 ( & successfulCount ) ) ,
)
2020-10-13 13:13:41 +01:00
mon . IntVal ( "repair_segment_pieces_total" ) . Observe ( int64 ( pieceCount ) ) //mon:locked
mon . IntVal ( "repair_segment_pieces_successful" ) . Observe ( int64 ( successfulCount ) ) //mon:locked
mon . IntVal ( "repair_segment_pieces_failed" ) . Observe ( int64 ( failureCount ) ) //mon:locked
mon . IntVal ( "repair_segment_pieces_canceled" ) . Observe ( int64 ( cancellationCount ) ) //mon:locked
2019-09-06 20:20:36 +01:00
return successfulNodes , successfulHashes , nil
}
2021-06-17 16:05:04 +01:00
func ( ec * ECRepairer ) putPiece ( ctx , parent context . Context , limit * pb . AddressedOrderLimit , privateKey storj . PiecePrivateKey , data io . ReadCloser ) ( hash * pb . PieceHash , err error ) {
2020-05-28 20:24:52 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-09-06 20:20:36 +01:00
nodeName := "nil"
if limit != nil {
nodeName = limit . GetLimit ( ) . StorageNodeId . String ( ) [ 0 : 8 ]
}
defer mon . Task ( ) ( & ctx , "node: " + nodeName ) ( & err )
defer func ( ) { err = errs . Combine ( err , data . Close ( ) ) } ( )
if limit == nil {
_ , _ = io . Copy ( ioutil . Discard , data )
return nil , nil
}
storageNodeID := limit . GetLimit ( ) . StorageNodeId
pieceID := limit . GetLimit ( ) . PieceId
2020-05-19 16:49:13 +01:00
ps , err := ec . dialPiecestore ( ctx , storj . NodeURL {
ID : storageNodeID ,
Address : limit . GetStorageNodeAddress ( ) . Address ,
2019-09-06 20:20:36 +01:00
} )
if err != nil {
ec . log . Debug ( "Failed dialing for putting piece to node" ,
2019-11-05 21:04:07 +00:00
zap . Stringer ( "Piece ID" , pieceID ) ,
zap . Stringer ( "Node ID" , storageNodeID ) ,
2019-09-06 20:20:36 +01:00
zap . Error ( err ) ,
)
return nil , err
}
defer func ( ) { err = errs . Combine ( err , ps . Close ( ) ) } ( )
2020-04-18 06:41:20 +01:00
hash , err = ps . UploadReader ( ctx , limit . GetLimit ( ) , privateKey , data )
2019-09-06 20:20:36 +01:00
if err != nil {
2020-04-18 06:41:20 +01:00
if errors . Is ( ctx . Err ( ) , context . Canceled ) {
// Canceled context means the piece upload was interrupted by user or due
// to slow connection. No error logging for this case.
if errors . Is ( parent . Err ( ) , context . Canceled ) {
ec . log . Debug ( "Upload to node canceled by user" ,
zap . Stringer ( "Node ID" , storageNodeID ) )
} else {
ec . log . Debug ( "Node cut from upload due to slow connection" ,
zap . Stringer ( "Node ID" , storageNodeID ) )
}
2019-09-06 20:20:36 +01:00
2020-04-18 06:41:20 +01:00
// make sure context.Canceled is the primary error in the error chain
// for later errors.Is/errs2.IsCanceled checking
err = errs . Combine ( context . Canceled , err )
2019-09-06 20:20:36 +01:00
} else {
2020-04-18 06:41:20 +01:00
nodeAddress := "nil"
if limit . GetStorageNodeAddress ( ) != nil {
nodeAddress = limit . GetStorageNodeAddress ( ) . GetAddress ( )
}
2019-09-06 20:20:36 +01:00
2020-04-18 06:41:20 +01:00
ec . log . Debug ( "Failed uploading piece to node" ,
zap . Stringer ( "Piece ID" , pieceID ) ,
zap . Stringer ( "Node ID" , storageNodeID ) ,
zap . String ( "Node Address" , nodeAddress ) ,
zap . Error ( err ) ,
)
}
2019-09-06 20:20:36 +01:00
}
return hash , err
}
func nonNilCount ( limits [ ] * pb . AddressedOrderLimit ) int {
total := 0
for _ , limit := range limits {
if limit != nil {
total ++
}
}
return total
}
func unique ( limits [ ] * pb . AddressedOrderLimit ) bool {
if len ( limits ) < 2 {
return true
}
ids := make ( storj . NodeIDList , len ( limits ) )
for i , addressedLimit := range limits {
if addressedLimit != nil {
ids [ i ] = addressedLimit . GetLimit ( ) . StorageNodeId
}
}
// sort the ids and check for identical neighbors
sort . Sort ( ids )
// sort.Slice(ids, func(i, k int) bool { return ids[i].Less(ids[k]) })
for i := 1 ; i < len ( ids ) ; i ++ {
if ids [ i ] != ( storj . NodeID { } ) && ids [ i ] == ids [ i - 1 ] {
return false
}
}
return true
}