2019-01-24 20:15:10 +00:00
// Copyright (C) 2019 Storj Labs, Inc.
2018-10-09 22:10:37 +01:00
// See LICENSE for copying information.
package audit
import (
"bytes"
"context"
"io"
2019-09-11 23:37:01 +01:00
"math/rand"
2019-10-08 22:01:32 +01:00
"sync"
2019-03-19 17:37:26 +00:00
"time"
2018-10-09 22:10:37 +01:00
"github.com/vivint/infectious"
2019-01-29 20:42:27 +00:00
"github.com/zeebo/errs"
2019-03-18 10:55:06 +00:00
"go.uber.org/zap"
2019-09-11 23:37:01 +01:00
"gopkg.in/spacemonkeygo/monkit.v2"
2018-10-09 22:10:37 +01:00
2019-12-27 11:48:47 +00:00
"storj.io/common/errs2"
"storj.io/common/identity"
"storj.io/common/memory"
"storj.io/common/pb"
"storj.io/common/pkcrypto"
"storj.io/common/rpc"
"storj.io/common/rpc/rpcstatus"
"storj.io/common/storj"
2019-06-19 10:02:25 +01:00
"storj.io/storj/satellite/metainfo"
2019-03-28 20:09:23 +00:00
"storj.io/storj/satellite/orders"
2019-07-28 06:55:36 +01:00
"storj.io/storj/satellite/overlay"
2019-09-11 23:37:01 +01:00
"storj.io/storj/uplink/eestream"
2019-03-18 10:55:06 +00:00
"storj.io/storj/uplink/piecestore"
2018-10-09 22:10:37 +01:00
)
2019-03-19 17:37:26 +00:00
var (
mon = monkit . Package ( )
2019-05-27 12:13:47 +01:00
// ErrNotEnoughShares is the errs class for when not enough shares are available to do an audit
ErrNotEnoughShares = errs . Class ( "not enough shares for successful audit" )
2019-06-19 10:02:25 +01:00
// ErrSegmentDeleted is the errs class when the audited segment was deleted during the audit
ErrSegmentDeleted = errs . Class ( "segment deleted during audit" )
2019-11-05 19:41:48 +00:00
// ErrSegmentExpired is the errs class used when a segment to audit has already expired.
ErrSegmentExpired = errs . Class ( "segment expired before audit" )
2019-03-19 17:37:26 +00:00
)
2018-10-09 22:10:37 +01:00
2019-01-23 19:58:44 +00:00
// Share represents required information about an audited share
type Share struct {
2019-03-18 10:55:06 +00:00
Error error
PieceNum int
2019-06-11 09:00:59 +01:00
NodeID storj . NodeID
2019-03-18 10:55:06 +00:00
Data [ ] byte
2018-10-09 22:10:37 +01:00
}
2018-10-10 19:25:46 +01:00
// Verifier helps verify the correctness of a given stripe
2019-09-10 14:24:16 +01:00
//
// architecture: Worker
2018-10-10 19:25:46 +01:00
type Verifier struct {
2019-06-03 10:17:09 +01:00
log * zap . Logger
2019-06-19 10:02:25 +01:00
metainfo * metainfo . Service
2019-06-03 10:17:09 +01:00
orders * orders . Service
auditor * identity . PeerIdentity
2019-09-19 05:46:39 +01:00
dialer rpc . Dialer
2019-08-06 17:35:59 +01:00
overlay * overlay . Service
2019-06-03 10:17:09 +01:00
containment Containment
minBytesPerSecond memory . Size
minDownloadTimeout time . Duration
2019-09-11 23:37:01 +01:00
OnTestingCheckSegmentAlteredHook func ( )
2018-10-09 22:10:37 +01:00
}
2018-10-10 19:25:46 +01:00
// NewVerifier creates a Verifier
2019-09-19 05:46:39 +01:00
func NewVerifier ( log * zap . Logger , metainfo * metainfo . Service , dialer rpc . Dialer , overlay * overlay . Service , containment Containment , orders * orders . Service , id * identity . FullIdentity , minBytesPerSecond memory . Size , minDownloadTimeout time . Duration ) * Verifier {
2019-05-27 12:13:47 +01:00
return & Verifier {
2019-06-03 10:17:09 +01:00
log : log ,
2019-06-19 10:02:25 +01:00
metainfo : metainfo ,
2019-06-03 10:17:09 +01:00
orders : orders ,
auditor : id . PeerIdentity ( ) ,
2019-09-19 05:46:39 +01:00
dialer : dialer ,
2019-06-03 10:17:09 +01:00
overlay : overlay ,
containment : containment ,
minBytesPerSecond : minBytesPerSecond ,
minDownloadTimeout : minDownloadTimeout ,
2019-05-27 12:13:47 +01:00
}
2018-10-09 22:10:37 +01:00
}
2019-09-11 23:37:01 +01:00
// Verify downloads shares then verifies the data correctness at a random stripe.
2019-10-09 15:06:58 +01:00
func ( verifier * Verifier ) Verify ( ctx context . Context , path storj . Path , skip map [ storj . NodeID ] bool ) ( report Report , err error ) {
2019-03-20 10:54:37 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-11-06 17:02:14 +00:00
pointerBytes , pointer , err := verifier . metainfo . GetWithBytes ( ctx , path )
2019-09-11 23:37:01 +01:00
if err != nil {
2019-12-10 20:21:30 +00:00
if storj . ErrObjectNotFound . Has ( err ) {
2019-10-09 15:06:58 +01:00
return Report { } , ErrSegmentDeleted . New ( "%q" , path )
2019-09-11 23:37:01 +01:00
}
2019-10-09 15:06:58 +01:00
return Report { } , err
2019-09-11 23:37:01 +01:00
}
2019-11-05 19:41:48 +00:00
if pointer . ExpirationDate != ( time . Time { } ) && pointer . ExpirationDate . Before ( time . Now ( ) . UTC ( ) ) {
2019-11-06 17:02:14 +00:00
errDelete := verifier . metainfo . Delete ( ctx , path , pointerBytes )
2019-11-05 19:41:48 +00:00
if errDelete != nil {
return Report { } , Error . Wrap ( errDelete )
}
2019-11-06 17:02:14 +00:00
return Report { } , ErrSegmentExpired . New ( "segment expired before Verify" )
2019-11-05 19:41:48 +00:00
}
2019-09-11 23:37:01 +01:00
2019-10-07 21:06:10 +01:00
defer func ( ) {
// if piece hashes have not been verified for this segment, do not mark nodes as failing audit
if ! pointer . PieceHashesVerified {
report . PendingAudits = nil
report . Fails = nil
}
} ( )
2019-09-11 23:37:01 +01:00
randomIndex , err := GetRandomStripe ( ctx , pointer )
if err != nil {
2019-10-09 15:06:58 +01:00
return Report { } , err
2019-09-11 23:37:01 +01:00
}
2019-03-20 10:54:37 +00:00
shareSize := pointer . GetRemote ( ) . GetRedundancy ( ) . GetErasureShareSize ( )
2019-09-11 23:37:01 +01:00
bucketID := createBucketID ( path )
2019-03-28 20:09:23 +00:00
2019-06-07 13:38:41 +01:00
var offlineNodes storj . NodeIDList
var failedNodes storj . NodeIDList
2019-11-19 16:30:28 +00:00
var unknownNodes storj . NodeIDList
2019-06-07 13:38:41 +01:00
containedNodes := make ( map [ int ] storj . NodeID )
sharesToAudit := make ( map [ int ] Share )
2019-07-11 21:51:40 +01:00
orderLimits , privateKey , err := verifier . orders . CreateAuditOrderLimits ( ctx , bucketID , pointer , skip )
2019-03-28 20:09:23 +00:00
if err != nil {
2019-10-09 15:06:58 +01:00
return Report { } , err
2019-03-28 20:09:23 +00:00
}
2019-03-20 10:54:37 +00:00
2019-08-20 15:23:14 +01:00
// NOTE offlineNodes will include disqualified nodes because they aren't in
// the skip list
2019-09-11 23:37:01 +01:00
offlineNodes = getOfflineNodes ( pointer , orderLimits , skip )
2019-06-07 13:38:41 +01:00
if len ( offlineNodes ) > 0 {
2019-10-08 11:51:57 +01:00
verifier . log . Debug ( "Verify: order limits not created for some nodes (offline/disqualified)" ,
zap . Bool ( "Piece Hash Verified" , pointer . PieceHashesVerified ) ,
2019-10-16 12:48:05 +01:00
zap . Binary ( "Segment" , [ ] byte ( path ) ) ,
2019-10-08 11:51:57 +01:00
zap . Strings ( "Node IDs" , offlineNodes . Strings ( ) ) )
2019-06-07 13:38:41 +01:00
}
2019-09-11 23:37:01 +01:00
shares , err := verifier . DownloadShares ( ctx , orderLimits , privateKey , randomIndex , shareSize )
2019-03-20 10:54:37 +00:00
if err != nil {
2019-10-09 15:06:58 +01:00
return Report {
2019-06-19 10:02:25 +01:00
Offlines : offlineNodes ,
} , err
}
2019-09-11 23:37:01 +01:00
_ , err = verifier . checkIfSegmentAltered ( ctx , path , pointer )
2019-06-19 10:02:25 +01:00
if err != nil {
2019-10-09 15:06:58 +01:00
return Report {
2019-06-19 10:02:25 +01:00
Offlines : offlineNodes ,
} , err
2019-03-20 10:54:37 +00:00
}
for pieceNum , share := range shares {
2019-06-07 13:38:41 +01:00
if share . Error == nil {
// no error -- share downloaded successfully
sharesToAudit [ pieceNum ] = share
continue
}
2019-09-19 05:46:39 +01:00
if rpc . Error . Has ( share . Error ) {
2019-06-26 08:38:07 +01:00
if errs . Is ( share . Error , context . DeadlineExceeded ) {
2019-06-07 13:38:41 +01:00
// dial timeout
2019-06-11 09:00:59 +01:00
offlineNodes = append ( offlineNodes , share . NodeID )
2019-10-08 11:51:57 +01:00
verifier . log . Debug ( "Verify: dial timeout (offline)" ,
zap . Bool ( "Piece Hash Verified" , pointer . PieceHashesVerified ) ,
2019-10-16 12:48:05 +01:00
zap . Binary ( "Segment" , [ ] byte ( path ) ) ,
2019-10-08 11:51:57 +01:00
zap . Stringer ( "Node ID" , share . NodeID ) ,
zap . Error ( share . Error ) )
2019-06-07 13:38:41 +01:00
continue
2019-03-20 10:54:37 +00:00
}
2019-09-19 05:46:39 +01:00
if errs2 . IsRPC ( share . Error , rpcstatus . Unknown ) {
2019-06-07 13:38:41 +01:00
// dial failed -- offline node
2019-06-11 09:00:59 +01:00
offlineNodes = append ( offlineNodes , share . NodeID )
2019-10-08 11:51:57 +01:00
verifier . log . Debug ( "Verify: dial failed (offline)" ,
zap . Bool ( "Piece Hash Verified" , pointer . PieceHashesVerified ) ,
2019-10-16 12:48:05 +01:00
zap . Binary ( "Segment" , [ ] byte ( path ) ) ,
2019-10-08 11:51:57 +01:00
zap . Stringer ( "Node ID" , share . NodeID ) ,
zap . Error ( share . Error ) )
2019-06-07 13:38:41 +01:00
continue
}
// unknown transport error
2019-11-19 16:30:28 +00:00
unknownNodes = append ( unknownNodes , share . NodeID )
verifier . log . Debug ( "Verify: unknown transport error (skipped)" ,
2019-10-08 11:51:57 +01:00
zap . Bool ( "Piece Hash Verified" , pointer . PieceHashesVerified ) ,
2019-10-16 12:48:05 +01:00
zap . Binary ( "Segment" , [ ] byte ( path ) ) ,
2019-10-08 11:51:57 +01:00
zap . Stringer ( "Node ID" , share . NodeID ) ,
zap . Error ( share . Error ) )
2019-10-11 19:40:02 +01:00
continue
2019-06-07 13:38:41 +01:00
}
2019-09-19 05:46:39 +01:00
if errs2 . IsRPC ( share . Error , rpcstatus . NotFound ) {
2019-06-07 13:38:41 +01:00
// missing share
2019-06-11 09:00:59 +01:00
failedNodes = append ( failedNodes , share . NodeID )
2019-10-08 11:51:57 +01:00
verifier . log . Debug ( "Verify: piece not found (audit failed)" ,
zap . Bool ( "Piece Hash Verified" , pointer . PieceHashesVerified ) ,
2019-10-16 12:48:05 +01:00
zap . Binary ( "Segment" , [ ] byte ( path ) ) ,
2019-10-08 11:51:57 +01:00
zap . Stringer ( "Node ID" , share . NodeID ) ,
zap . Error ( share . Error ) )
2019-06-07 13:38:41 +01:00
continue
2019-03-20 10:54:37 +00:00
}
2019-06-07 13:38:41 +01:00
2019-09-19 05:46:39 +01:00
if errs2 . IsRPC ( share . Error , rpcstatus . DeadlineExceeded ) {
2019-06-07 13:38:41 +01:00
// dial successful, but download timed out
2019-06-11 09:00:59 +01:00
containedNodes [ pieceNum ] = share . NodeID
2019-10-08 11:51:57 +01:00
verifier . log . Debug ( "Verify: download timeout (contained)" ,
zap . Bool ( "Piece Hash Verified" , pointer . PieceHashesVerified ) ,
2019-10-16 12:48:05 +01:00
zap . Binary ( "Segment" , [ ] byte ( path ) ) ,
2019-10-08 11:51:57 +01:00
zap . Stringer ( "Node ID" , share . NodeID ) ,
zap . Error ( share . Error ) )
2019-06-07 13:38:41 +01:00
continue
}
// unknown error
2019-11-19 16:30:28 +00:00
unknownNodes = append ( unknownNodes , share . NodeID )
verifier . log . Debug ( "Verify: unknown error (skipped)" ,
2019-10-08 11:51:57 +01:00
zap . Bool ( "Piece Hash Verified" , pointer . PieceHashesVerified ) ,
2019-10-16 12:48:05 +01:00
zap . Binary ( "Segment" , [ ] byte ( path ) ) ,
2019-10-08 11:51:57 +01:00
zap . Stringer ( "Node ID" , share . NodeID ) ,
zap . Error ( share . Error ) )
2019-03-20 10:54:37 +00:00
}
required := int ( pointer . Remote . Redundancy . GetMinReq ( ) )
total := int ( pointer . Remote . Redundancy . GetTotal ( ) )
if len ( sharesToAudit ) < required {
2019-10-09 15:06:58 +01:00
return Report {
2019-06-07 13:38:41 +01:00
Fails : failedNodes ,
2019-05-23 23:32:19 +01:00
Offlines : offlineNodes ,
2019-11-19 16:30:28 +00:00
Unknown : unknownNodes ,
2019-05-27 12:13:47 +01:00
} , ErrNotEnoughShares . New ( "got %d, required %d" , len ( sharesToAudit ) , required )
2019-03-20 10:54:37 +00:00
}
2019-05-23 21:07:19 +01:00
pieceNums , correctedShares , err := auditShares ( ctx , required , total , sharesToAudit )
2019-03-20 10:54:37 +00:00
if err != nil {
2019-10-09 15:06:58 +01:00
return Report {
2019-06-07 13:38:41 +01:00
Fails : failedNodes ,
2019-05-23 23:32:19 +01:00
Offlines : offlineNodes ,
2019-11-19 16:30:28 +00:00
Unknown : unknownNodes ,
2019-05-23 21:07:19 +01:00
} , err
2019-03-20 10:54:37 +00:00
}
for _ , pieceNum := range pieceNums {
2019-06-11 09:00:59 +01:00
failedNodes = append ( failedNodes , shares [ pieceNum ] . NodeID )
2019-03-20 10:54:37 +00:00
}
2019-11-19 16:30:28 +00:00
successNodes := getSuccessNodes ( ctx , shares , failedNodes , offlineNodes , unknownNodes , containedNodes )
2019-05-23 21:07:19 +01:00
2019-09-11 23:37:01 +01:00
totalInPointer := len ( pointer . GetRemote ( ) . GetRemotePieces ( ) )
2019-05-31 21:46:25 +01:00
numOffline := len ( offlineNodes )
numSuccessful := len ( successNodes )
numFailed := len ( failedNodes )
2019-06-07 13:38:41 +01:00
numContained := len ( containedNodes )
2019-11-19 16:30:28 +00:00
numUnknown := len ( unknownNodes )
2019-06-07 13:38:41 +01:00
totalAudited := numSuccessful + numFailed + numOffline + numContained
2019-05-31 21:46:25 +01:00
auditedPercentage := float64 ( totalAudited ) / float64 ( totalInPointer )
offlinePercentage := float64 ( 0 )
successfulPercentage := float64 ( 0 )
failedPercentage := float64 ( 0 )
2019-06-07 13:38:41 +01:00
containedPercentage := float64 ( 0 )
2019-11-19 16:30:28 +00:00
unknownPercentage := float64 ( 0 )
2019-05-31 21:46:25 +01:00
if totalAudited > 0 {
offlinePercentage = float64 ( numOffline ) / float64 ( totalAudited )
successfulPercentage = float64 ( numSuccessful ) / float64 ( totalAudited )
failedPercentage = float64 ( numFailed ) / float64 ( totalAudited )
2019-06-07 13:38:41 +01:00
containedPercentage = float64 ( numContained ) / float64 ( totalAudited )
2019-11-19 16:30:28 +00:00
unknownPercentage = float64 ( numUnknown ) / float64 ( totalAudited )
}
mon . Meter ( "audit_success_nodes_global" ) . Mark ( numSuccessful ) //locked
mon . Meter ( "audit_fail_nodes_global" ) . Mark ( numFailed ) //locked
mon . Meter ( "audit_offline_nodes_global" ) . Mark ( numOffline ) //locked
mon . Meter ( "audit_contained_nodes_global" ) . Mark ( numContained ) //locked
mon . Meter ( "audit_unknown_nodes_global" ) . Mark ( numUnknown ) //locked
mon . Meter ( "audit_total_nodes_global" ) . Mark ( totalAudited ) //locked
mon . Meter ( "audit_total_pointer_nodes_global" ) . Mark ( totalInPointer ) //locked
mon . IntVal ( "audit_success_nodes" ) . Observe ( int64 ( numSuccessful ) ) //locked
mon . IntVal ( "audit_fail_nodes" ) . Observe ( int64 ( numFailed ) ) //locked
mon . IntVal ( "audit_offline_nodes" ) . Observe ( int64 ( numOffline ) ) //locked
mon . IntVal ( "audit_contained_nodes" ) . Observe ( int64 ( numContained ) ) //locked
mon . IntVal ( "audit_unknown_nodes" ) . Observe ( int64 ( numUnknown ) ) //locked
mon . IntVal ( "audit_total_nodes" ) . Observe ( int64 ( totalAudited ) ) //locked
mon . IntVal ( "audit_total_pointer_nodes" ) . Observe ( int64 ( totalInPointer ) ) //locked
mon . FloatVal ( "audited_percentage" ) . Observe ( auditedPercentage ) //locked
mon . FloatVal ( "audit_offline_percentage" ) . Observe ( offlinePercentage ) //locked
mon . FloatVal ( "audit_successful_percentage" ) . Observe ( successfulPercentage ) //locked
mon . FloatVal ( "audit_failed_percentage" ) . Observe ( failedPercentage ) //locked
mon . FloatVal ( "audit_contained_percentage" ) . Observe ( containedPercentage ) //locked
mon . FloatVal ( "audit_unknown_percentage" ) . Observe ( unknownPercentage ) //locked
2019-05-31 21:46:25 +01:00
2019-09-11 23:37:01 +01:00
pendingAudits , err := createPendingAudits ( ctx , containedNodes , correctedShares , pointer , randomIndex , path )
2019-05-23 21:07:19 +01:00
if err != nil {
2019-10-09 15:06:58 +01:00
return Report {
2019-05-23 23:32:19 +01:00
Successes : successNodes ,
Fails : failedNodes ,
Offlines : offlineNodes ,
2019-11-19 16:30:28 +00:00
Unknown : unknownNodes ,
2019-05-23 21:07:19 +01:00
} , err
}
2019-03-20 10:54:37 +00:00
2019-10-09 15:06:58 +01:00
return Report {
2019-05-23 23:32:19 +01:00
Successes : successNodes ,
Fails : failedNodes ,
Offlines : offlineNodes ,
PendingAudits : pendingAudits ,
2019-11-19 16:30:28 +00:00
Unknown : unknownNodes ,
2019-03-20 10:54:37 +00:00
} , nil
}
2019-05-17 19:48:32 +01:00
// DownloadShares downloads shares from the nodes where remote pieces are located
2019-07-11 21:51:40 +01:00
func ( verifier * Verifier ) DownloadShares ( ctx context . Context , limits [ ] * pb . AddressedOrderLimit , piecePrivateKey storj . PiecePrivateKey , stripeIndex int64 , shareSize int32 ) ( shares map [ int ] Share , err error ) {
2019-03-20 10:54:37 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
shares = make ( map [ int ] Share , len ( limits ) )
2019-06-11 09:00:59 +01:00
ch := make ( chan * Share , len ( limits ) )
2019-03-20 10:54:37 +00:00
for i , limit := range limits {
if limit == nil {
2019-06-11 09:00:59 +01:00
ch <- nil
2019-03-20 10:54:37 +00:00
continue
}
2019-06-11 09:00:59 +01:00
go func ( i int , limit * pb . AddressedOrderLimit ) {
2019-07-11 21:51:40 +01:00
share , err := verifier . GetShare ( ctx , limit , piecePrivateKey , stripeIndex , shareSize , i )
2019-06-11 09:00:59 +01:00
if err != nil {
share = Share {
Error : err ,
PieceNum : i ,
NodeID : limit . GetLimit ( ) . StorageNodeId ,
Data : nil ,
}
2019-03-20 10:54:37 +00:00
}
2019-06-11 09:00:59 +01:00
ch <- & share
} ( i , limit )
}
2019-03-20 10:54:37 +00:00
2019-06-11 09:00:59 +01:00
for range limits {
share := <- ch
if share != nil {
shares [ share . PieceNum ] = * share
}
2019-03-20 10:54:37 +00:00
}
2019-06-11 09:00:59 +01:00
return shares , nil
2019-03-20 10:54:37 +00:00
}
2019-05-27 12:13:47 +01:00
// Reverify reverifies the contained nodes in the stripe
2019-10-09 15:06:58 +01:00
func ( verifier * Verifier ) Reverify ( ctx context . Context , path storj . Path ) ( report Report , err error ) {
2019-05-27 12:13:47 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
// result status enum
const (
skipped = iota
success
offline
failed
contained
2019-11-19 16:30:28 +00:00
unknown
2019-05-27 12:13:47 +01:00
erred
)
type result struct {
nodeID storj . NodeID
status int
pendingAudit * PendingAudit
err error
}
2019-11-06 17:02:14 +00:00
pointerBytes , pointer , err := verifier . metainfo . GetWithBytes ( ctx , path )
2019-09-11 23:37:01 +01:00
if err != nil {
2019-12-10 20:21:30 +00:00
if storj . ErrObjectNotFound . Has ( err ) {
2019-10-09 15:06:58 +01:00
return Report { } , ErrSegmentDeleted . New ( "%q" , path )
2019-09-11 23:37:01 +01:00
}
2019-10-09 15:06:58 +01:00
return Report { } , err
2019-09-11 23:37:01 +01:00
}
2019-11-05 19:41:48 +00:00
if pointer . ExpirationDate != ( time . Time { } ) && pointer . ExpirationDate . Before ( time . Now ( ) . UTC ( ) ) {
2019-11-06 17:02:14 +00:00
errDelete := verifier . metainfo . Delete ( ctx , path , pointerBytes )
2019-11-05 19:41:48 +00:00
if errDelete != nil {
return Report { } , Error . Wrap ( errDelete )
}
return Report { } , ErrSegmentExpired . New ( "Segment expired before Reverify" )
}
2019-09-11 23:37:01 +01:00
2019-10-07 21:06:10 +01:00
pieceHashesVerified := make ( map [ storj . NodeID ] bool )
2019-10-08 22:01:32 +01:00
pieceHashesVerifiedMutex := & sync . Mutex { }
2019-10-07 21:06:10 +01:00
defer func ( ) {
2019-10-08 22:01:32 +01:00
pieceHashesVerifiedMutex . Lock ( )
2019-10-07 21:06:10 +01:00
// for each node in Fails and PendingAudits, remove if piece hashes not verified for that segment
newFails := storj . NodeIDList { }
newPendingAudits := [ ] * PendingAudit { }
for _ , id := range report . Fails {
if pieceHashesVerified [ id ] {
newFails = append ( newFails , id )
}
}
for _ , pending := range report . PendingAudits {
if pieceHashesVerified [ pending . NodeID ] {
newPendingAudits = append ( newPendingAudits , pending )
}
}
report . Fails = newFails
report . PendingAudits = newPendingAudits
2019-10-08 22:01:32 +01:00
pieceHashesVerifiedMutex . Unlock ( )
2019-10-07 21:06:10 +01:00
} ( )
2019-09-11 23:37:01 +01:00
pieces := pointer . GetRemote ( ) . GetRemotePieces ( )
2019-05-27 12:13:47 +01:00
ch := make ( chan result , len ( pieces ) )
2019-06-18 13:54:52 +01:00
var containedInSegment int64
2019-05-27 12:13:47 +01:00
for _ , piece := range pieces {
pending , err := verifier . containment . Get ( ctx , piece . NodeId )
if err != nil {
if ErrContainedNotFound . Has ( err ) {
ch <- result { nodeID : piece . NodeId , status : skipped }
continue
}
ch <- result { nodeID : piece . NodeId , status : erred , err : err }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: error getting from containment db" , zap . Binary ( "Segment" , [ ] byte ( path ) ) , zap . Stringer ( "Node ID" , piece . NodeId ) , zap . Error ( err ) )
2019-05-27 12:13:47 +01:00
continue
}
2019-06-18 13:54:52 +01:00
containedInSegment ++
2019-05-27 12:13:47 +01:00
2019-09-19 00:45:15 +01:00
go func ( pending * PendingAudit ) {
2019-11-06 17:02:14 +00:00
pendingPointerBytes , pendingPointer , err := verifier . metainfo . GetWithBytes ( ctx , pending . Path )
2019-09-19 00:45:15 +01:00
if err != nil {
2019-12-10 20:21:30 +00:00
if storj . ErrObjectNotFound . Has ( err ) {
2019-09-29 03:03:15 +01:00
// segment has been deleted since node was contained
_ , errDelete := verifier . containment . Delete ( ctx , pending . NodeID )
if errDelete != nil {
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Error deleting node from containment db" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( errDelete ) )
2019-09-29 03:03:15 +01:00
}
ch <- result { nodeID : pending . NodeID , status : skipped }
return
}
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : erred , err : err }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: error getting pending pointer from metainfo" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( err ) )
2019-09-19 00:45:15 +01:00
return
}
2019-11-05 19:41:48 +00:00
if pendingPointer . ExpirationDate != ( time . Time { } ) && pendingPointer . ExpirationDate . Before ( time . Now ( ) . UTC ( ) ) {
2019-11-06 17:02:14 +00:00
errDelete := verifier . metainfo . Delete ( ctx , pending . Path , pendingPointerBytes )
2019-11-05 19:41:48 +00:00
if errDelete != nil {
verifier . log . Debug ( "Reverify: error deleting expired segment" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( errDelete ) )
}
_ , errDelete = verifier . containment . Delete ( ctx , pending . NodeID )
if errDelete != nil {
verifier . log . Debug ( "Error deleting node from containment db" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( errDelete ) )
}
verifier . log . Debug ( "Reverify: segment already expired" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) )
ch <- result { nodeID : pending . NodeID , status : skipped }
return
}
2019-10-07 21:06:10 +01:00
// set whether piece hashes have been verified for this segment so we know whether to report a failed or pending audit for this node
2019-10-08 22:01:32 +01:00
pieceHashesVerifiedMutex . Lock ( )
2019-10-07 21:06:10 +01:00
pieceHashesVerified [ pending . NodeID ] = pendingPointer . PieceHashesVerified
2019-10-08 22:01:32 +01:00
pieceHashesVerifiedMutex . Unlock ( )
2019-10-07 21:06:10 +01:00
2019-09-29 03:03:15 +01:00
if pendingPointer . GetRemote ( ) . RootPieceId != pending . PieceID {
// segment has changed since initial containment
_ , errDelete := verifier . containment . Delete ( ctx , pending . NodeID )
if errDelete != nil {
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Error deleting node from containment db" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( errDelete ) )
2019-09-29 03:03:15 +01:00
}
ch <- result { nodeID : pending . NodeID , status : skipped }
return
}
2019-09-19 00:45:15 +01:00
var pieceNum int32
found := false
for _ , piece := range pendingPointer . GetRemote ( ) . GetRemotePieces ( ) {
if piece . NodeId == pending . NodeID {
pieceNum = piece . PieceNum
found = true
}
}
if ! found {
2019-09-29 03:03:15 +01:00
// node is no longer in pointer, so remove from containment
_ , errDelete := verifier . containment . Delete ( ctx , pending . NodeID )
if errDelete != nil {
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Error deleting node from containment db" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( errDelete ) )
2019-09-29 03:03:15 +01:00
}
ch <- result { nodeID : pending . NodeID , status : skipped }
2019-09-19 00:45:15 +01:00
return
}
limit , piecePrivateKey , err := verifier . orders . CreateAuditOrderLimit ( ctx , createBucketID ( pending . Path ) , pending . NodeID , pieceNum , pending . PieceID , pending . ShareSize )
2019-05-27 12:13:47 +01:00
if err != nil {
2019-06-24 15:46:10 +01:00
if overlay . ErrNodeDisqualified . Has ( err ) {
2019-09-19 00:45:15 +01:00
_ , errDelete := verifier . containment . Delete ( ctx , pending . NodeID )
2019-06-24 15:46:10 +01:00
if errDelete != nil {
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Error deleting disqualified node from containment db" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( errDelete ) )
2019-06-24 15:46:10 +01:00
}
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : erred , err : err }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: order limit not created (disqualified)" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) )
2019-06-24 15:46:10 +01:00
return
}
2019-05-27 12:13:47 +01:00
if overlay . ErrNodeOffline . Has ( err ) {
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : offline }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: order limit not created (offline)" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) )
2019-05-27 12:13:47 +01:00
return
}
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : erred , err : err }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: error creating order limit" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( err ) )
2019-05-27 12:13:47 +01:00
return
}
2019-09-19 00:45:15 +01:00
share , err := verifier . GetShare ( ctx , limit , piecePrivateKey , pending . StripeIndex , pending . ShareSize , int ( pieceNum ) )
2019-06-19 10:02:25 +01:00
// check if the pending audit was deleted while downloading the share
2019-09-19 00:45:15 +01:00
_ , getErr := verifier . containment . Get ( ctx , pending . NodeID )
2019-06-19 10:02:25 +01:00
if getErr != nil {
if ErrContainedNotFound . Has ( getErr ) {
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : skipped }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: pending audit deleted during reverification" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( getErr ) )
2019-06-19 10:02:25 +01:00
return
}
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : erred , err : getErr }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: error getting from containment db" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( getErr ) )
2019-06-19 10:02:25 +01:00
return
}
// analyze the error from GetShare
2019-05-27 12:13:47 +01:00
if err != nil {
2019-09-19 05:46:39 +01:00
if rpc . Error . Has ( err ) {
2019-06-26 08:38:07 +01:00
if errs . Is ( err , context . DeadlineExceeded ) {
2019-06-07 13:38:41 +01:00
// dial timeout
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : offline }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: dial timeout (offline)" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( err ) )
2019-06-07 13:38:41 +01:00
return
}
2019-09-19 05:46:39 +01:00
if errs2 . IsRPC ( err , rpcstatus . Unknown ) {
2019-06-07 13:38:41 +01:00
// dial failed -- offline node
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: dial failed (offline)" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( err ) )
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : offline }
2019-06-07 13:38:41 +01:00
return
}
// unknown transport error
2019-11-19 16:30:28 +00:00
ch <- result { nodeID : pending . NodeID , status : unknown , pendingAudit : pending }
verifier . log . Debug ( "Reverify: unknown transport error (skipped)" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( err ) )
2019-06-07 13:38:41 +01:00
return
}
2019-09-19 05:46:39 +01:00
if errs2 . IsRPC ( err , rpcstatus . NotFound ) {
2019-07-18 19:08:15 +01:00
// Get the original segment pointer in the metainfo
2019-12-16 15:42:26 +00:00
_ , err := verifier . checkIfSegmentAltered ( ctx , pending . Path , pendingPointer )
2019-07-18 19:08:15 +01:00
if err != nil {
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : success }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: audit source deleted before reverification" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( err ) )
2019-07-18 19:08:15 +01:00
return
}
2019-06-07 13:38:41 +01:00
// missing share
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : failed }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: piece not found (audit failed)" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( err ) )
2019-06-07 13:38:41 +01:00
return
}
2019-09-19 05:46:39 +01:00
if errs2 . IsRPC ( err , rpcstatus . DeadlineExceeded ) {
2019-06-07 13:38:41 +01:00
// dial successful, but download timed out
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : contained , pendingAudit : pending }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: download timeout (contained)" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( err ) )
2019-06-07 13:38:41 +01:00
return
2019-05-27 12:13:47 +01:00
}
2019-06-07 13:38:41 +01:00
// unknown error
2019-11-19 16:30:28 +00:00
ch <- result { nodeID : pending . NodeID , status : unknown , pendingAudit : pending }
verifier . log . Debug ( "Reverify: unknown error (skipped)" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( err ) )
2019-05-27 12:13:47 +01:00
return
}
downloadedHash := pkcrypto . SHA256Hash ( share . Data )
if bytes . Equal ( downloadedHash , pending . ExpectedShareHash ) {
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : success }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: hashes match (audit success)" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) )
2019-05-27 12:13:47 +01:00
} else {
2019-12-16 15:42:26 +00:00
_ , err := verifier . checkIfSegmentAltered ( ctx , pending . Path , pendingPointer )
2019-07-18 19:08:15 +01:00
if err != nil {
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : success }
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: audit source deleted before reverification" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) , zap . Error ( err ) )
2019-07-18 19:08:15 +01:00
return
}
2019-10-16 12:48:05 +01:00
verifier . log . Debug ( "Reverify: hashes mismatch (audit failed)" , zap . Binary ( "Segment" , [ ] byte ( pending . Path ) ) , zap . Stringer ( "Node ID" , pending . NodeID ) ,
2019-06-07 13:38:41 +01:00
zap . Binary ( "expected hash" , pending . ExpectedShareHash ) , zap . Binary ( "downloaded hash" , downloadedHash ) )
2019-09-19 00:45:15 +01:00
ch <- result { nodeID : pending . NodeID , status : failed }
2019-05-27 12:13:47 +01:00
}
2019-09-19 00:45:15 +01:00
} ( pending )
2019-05-27 12:13:47 +01:00
}
for range pieces {
result := <- ch
switch result . status {
case success :
report . Successes = append ( report . Successes , result . nodeID )
case offline :
report . Offlines = append ( report . Offlines , result . nodeID )
case failed :
report . Fails = append ( report . Fails , result . nodeID )
case contained :
report . PendingAudits = append ( report . PendingAudits , result . pendingAudit )
2019-11-19 16:30:28 +00:00
case unknown :
report . Unknown = append ( report . Unknown , result . nodeID )
2019-05-27 12:13:47 +01:00
case erred :
err = errs . Combine ( err , result . err )
}
}
2019-11-19 16:30:28 +00:00
mon . Meter ( "reverify_successes_global" ) . Mark ( len ( report . Successes ) ) //locked
mon . Meter ( "reverify_offlines_global" ) . Mark ( len ( report . Offlines ) ) //locked
mon . Meter ( "reverify_fails_global" ) . Mark ( len ( report . Fails ) ) //locked
mon . Meter ( "reverify_contained_global" ) . Mark ( len ( report . PendingAudits ) ) //locked
mon . Meter ( "reverify_unknown_global" ) . Mark ( len ( report . Unknown ) ) //locked
2019-06-18 13:54:52 +01:00
2019-11-19 16:30:28 +00:00
mon . IntVal ( "reverify_successes" ) . Observe ( int64 ( len ( report . Successes ) ) ) //locked
mon . IntVal ( "reverify_offlines" ) . Observe ( int64 ( len ( report . Offlines ) ) ) //locked
mon . IntVal ( "reverify_fails" ) . Observe ( int64 ( len ( report . Fails ) ) ) //locked
mon . IntVal ( "reverify_contained" ) . Observe ( int64 ( len ( report . PendingAudits ) ) ) //locked
mon . IntVal ( "reverify_unknown" ) . Observe ( int64 ( len ( report . Unknown ) ) ) //locked
2019-06-18 13:54:52 +01:00
2019-11-19 16:30:28 +00:00
mon . IntVal ( "reverify_contained_in_segment" ) . Observe ( containedInSegment ) //locked
mon . IntVal ( "reverify_total_in_segment" ) . Observe ( int64 ( len ( pieces ) ) ) //locked
2019-06-18 13:54:52 +01:00
2019-05-27 12:13:47 +01:00
return report , err
}
// GetShare use piece store client to download shares from nodes
2019-07-11 21:51:40 +01:00
func ( verifier * Verifier ) GetShare ( ctx context . Context , limit * pb . AddressedOrderLimit , piecePrivateKey storj . PiecePrivateKey , stripeIndex int64 , shareSize int32 , pieceNum int ) ( share Share , err error ) {
2018-10-09 22:10:37 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-01-10 20:13:40 +00:00
2019-03-19 17:37:26 +00:00
bandwidthMsgSize := shareSize
// determines number of seconds allotted for receiving data from a storage node
2019-03-22 13:14:17 +00:00
timedCtx := ctx
2019-05-17 19:48:32 +01:00
if verifier . minBytesPerSecond > 0 {
maxTransferTime := time . Duration ( int64 ( time . Second ) * int64 ( bandwidthMsgSize ) / verifier . minBytesPerSecond . Int64 ( ) )
2019-06-03 10:17:09 +01:00
if maxTransferTime < verifier . minDownloadTimeout {
maxTransferTime = verifier . minDownloadTimeout
2019-04-03 18:17:29 +01:00
}
2019-03-22 13:14:17 +00:00
var cancel func ( )
timedCtx , cancel = context . WithTimeout ( ctx , maxTransferTime )
defer cancel ( )
}
2019-03-19 17:37:26 +00:00
2019-03-18 10:55:06 +00:00
storageNodeID := limit . GetLimit ( ) . StorageNodeId
2019-06-26 13:14:48 +01:00
log := verifier . log . Named ( storageNodeID . String ( ) )
target := & pb . Node { Id : storageNodeID , Address : limit . GetStorageNodeAddress ( ) }
2018-10-09 22:10:37 +01:00
2019-09-19 05:46:39 +01:00
ps , err := piecestore . Dial ( timedCtx , verifier . dialer , target , log , piecestore . DefaultConfig )
2018-10-09 22:10:37 +01:00
if err != nil {
2019-06-26 13:14:48 +01:00
return Share { } , Error . Wrap ( err )
2018-10-09 22:10:37 +01:00
}
2019-04-03 14:42:24 +01:00
defer func ( ) {
err := ps . Close ( )
if err != nil {
2019-05-17 19:48:32 +01:00
verifier . log . Error ( "audit verifier failed to close conn to node: %+v" , zap . Error ( err ) )
2019-04-03 14:42:24 +01:00
}
} ( )
2018-10-09 22:10:37 +01:00
2019-03-18 10:55:06 +00:00
offset := int64 ( shareSize ) * stripeIndex
2018-10-09 22:10:37 +01:00
2019-07-11 21:51:40 +01:00
downloader , err := ps . Download ( timedCtx , limit . GetLimit ( ) , piecePrivateKey , offset , int64 ( shareSize ) )
2018-10-09 22:10:37 +01:00
if err != nil {
2019-03-18 10:55:06 +00:00
return Share { } , err
2018-10-09 22:10:37 +01:00
}
2019-03-18 10:55:06 +00:00
defer func ( ) { err = errs . Combine ( err , downloader . Close ( ) ) } ( )
2018-10-09 22:10:37 +01:00
buf := make ( [ ] byte , shareSize )
2019-03-18 10:55:06 +00:00
_ , err = io . ReadFull ( downloader , buf )
2018-10-09 22:10:37 +01:00
if err != nil {
2019-03-18 10:55:06 +00:00
return Share { } , err
2018-10-09 22:10:37 +01:00
}
2019-03-18 10:55:06 +00:00
return Share {
Error : nil ,
PieceNum : pieceNum ,
2019-06-11 09:00:59 +01:00
NodeID : storageNodeID ,
2019-03-18 10:55:06 +00:00
Data : buf ,
} , nil
2018-10-09 22:10:37 +01:00
}
2019-09-11 23:37:01 +01:00
// checkIfSegmentAltered checks if path's pointer has been altered since path was selected.
func ( verifier * Verifier ) checkIfSegmentAltered ( ctx context . Context , segmentPath string , oldPointer * pb . Pointer ) ( newPointer * pb . Pointer , err error ) {
2019-06-19 10:02:25 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-09-11 23:37:01 +01:00
if verifier . OnTestingCheckSegmentAlteredHook != nil {
verifier . OnTestingCheckSegmentAlteredHook ( )
}
2019-07-18 19:08:15 +01:00
newPointer , err = verifier . metainfo . Get ( ctx , segmentPath )
2019-06-19 10:02:25 +01:00
if err != nil {
2019-12-10 20:21:30 +00:00
if storj . ErrObjectNotFound . Has ( err ) {
2019-08-21 17:30:29 +01:00
return nil , ErrSegmentDeleted . New ( "%q" , segmentPath )
2019-06-19 10:02:25 +01:00
}
2019-07-18 19:08:15 +01:00
return nil , err
2019-06-19 10:02:25 +01:00
}
2019-07-18 19:08:15 +01:00
if oldPointer != nil && oldPointer . CreationDate != newPointer . CreationDate {
2019-08-21 17:30:29 +01:00
return nil , ErrSegmentDeleted . New ( "%q" , segmentPath )
2019-06-19 10:02:25 +01:00
}
2019-07-18 19:08:15 +01:00
return newPointer , nil
2019-06-19 10:02:25 +01:00
}
2018-10-09 22:10:37 +01:00
// auditShares takes the downloaded shares and uses infectious's Correct function to check that they
2019-05-23 21:07:19 +01:00
// haven't been altered. auditShares returns a slice containing the piece numbers of altered shares,
// and a slice of the corrected shares.
func auditShares ( ctx context . Context , required , total int , originals map [ int ] Share ) ( pieceNums [ ] int , corrected [ ] infectious . Share , err error ) {
2018-10-09 22:10:37 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
f , err := infectious . NewFEC ( required , total )
if err != nil {
2019-05-23 21:07:19 +01:00
return nil , nil , err
2018-10-09 22:10:37 +01:00
}
2018-11-07 01:16:43 +00:00
2018-10-09 22:10:37 +01:00
copies , err := makeCopies ( ctx , originals )
if err != nil {
2019-05-23 21:07:19 +01:00
return nil , nil , err
2018-10-09 22:10:37 +01:00
}
err = f . Correct ( copies )
if err != nil {
2019-05-23 21:07:19 +01:00
return nil , nil , err
2018-10-09 22:10:37 +01:00
}
2019-03-19 17:37:26 +00:00
2018-11-28 07:33:17 +00:00
for _ , share := range copies {
if ! bytes . Equal ( originals [ share . Number ] . Data , share . Data ) {
2018-10-09 22:10:37 +01:00
pieceNums = append ( pieceNums , share . Number )
}
}
2019-05-23 21:07:19 +01:00
return pieceNums , copies , nil
2018-10-09 22:10:37 +01:00
}
2019-03-20 10:54:37 +00:00
// makeCopies takes in a map of audit Shares and deep copies their data to a slice of infectious Shares
func makeCopies ( ctx context . Context , originals map [ int ] Share ) ( copies [ ] infectious . Share , err error ) {
2018-10-09 22:10:37 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-03-20 10:54:37 +00:00
copies = make ( [ ] infectious . Share , 0 , len ( originals ) )
for _ , original := range originals {
copies = append ( copies , infectious . Share {
Data : append ( [ ] byte { } , original . Data ... ) ,
Number : original . PieceNum } )
2018-10-16 18:40:34 +01:00
}
2019-03-20 10:54:37 +00:00
return copies , nil
2018-10-16 18:40:34 +01:00
}
2019-08-20 15:23:14 +01:00
// getOfflines nodes returns these storage nodes from pointer which have no
// order limit nor are skipped.
2019-06-07 13:38:41 +01:00
func getOfflineNodes ( pointer * pb . Pointer , limits [ ] * pb . AddressedOrderLimit , skip map [ storj . NodeID ] bool ) storj . NodeIDList {
var offlines storj . NodeIDList
nodesWithLimit := make ( map [ storj . NodeID ] bool , len ( limits ) )
for _ , limit := range limits {
if limit != nil {
nodesWithLimit [ limit . GetLimit ( ) . StorageNodeId ] = true
}
}
for _ , piece := range pointer . GetRemote ( ) . GetRemotePieces ( ) {
if ! nodesWithLimit [ piece . NodeId ] && ! skip [ piece . NodeId ] {
offlines = append ( offlines , piece . NodeId )
}
}
return offlines
}
2019-05-23 21:07:19 +01:00
// getSuccessNodes uses the failed nodes, offline nodes and contained nodes arrays to determine which nodes passed the audit
2019-11-19 16:30:28 +00:00
func getSuccessNodes ( ctx context . Context , shares map [ int ] Share , failedNodes , offlineNodes , unknownNodes storj . NodeIDList , containedNodes map [ int ] storj . NodeID ) ( successNodes storj . NodeIDList ) {
2019-06-04 12:36:27 +01:00
defer mon . Task ( ) ( & ctx ) ( nil )
2018-11-29 18:39:27 +00:00
fails := make ( map [ storj . NodeID ] bool )
2018-10-16 18:40:34 +01:00
for _ , fail := range failedNodes {
fails [ fail ] = true
2018-10-09 22:10:37 +01:00
}
2018-10-16 18:40:34 +01:00
for _ , offline := range offlineNodes {
fails [ offline ] = true
}
2019-11-19 16:30:28 +00:00
for _ , unknown := range unknownNodes {
fails [ unknown ] = true
}
2019-05-23 21:07:19 +01:00
for _ , contained := range containedNodes {
fails [ contained ] = true
}
2018-10-16 18:40:34 +01:00
2019-06-11 09:00:59 +01:00
for _ , share := range shares {
if ! fails [ share . NodeID ] {
successNodes = append ( successNodes , share . NodeID )
2018-10-16 18:40:34 +01:00
}
}
2019-02-01 14:48:57 +00:00
2018-10-16 18:40:34 +01:00
return successNodes
}
2019-03-28 20:09:23 +00:00
func createBucketID ( path storj . Path ) [ ] byte {
comps := storj . SplitPath ( path )
2019-04-01 21:14:58 +01:00
if len ( comps ) < 3 {
2019-03-28 20:09:23 +00:00
return nil
}
2019-04-01 21:14:58 +01:00
// project_id/bucket_name
return [ ] byte ( storj . JoinPaths ( comps [ 0 ] , comps [ 2 ] ) )
2019-03-28 20:09:23 +00:00
}
2019-05-23 21:07:19 +01:00
2019-09-11 23:37:01 +01:00
func createPendingAudits ( ctx context . Context , containedNodes map [ int ] storj . NodeID , correctedShares [ ] infectious . Share , pointer * pb . Pointer , randomIndex int64 , path storj . Path ) ( pending [ ] * PendingAudit , err error ) {
2019-06-04 12:36:27 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-06-07 13:38:41 +01:00
if len ( containedNodes ) == 0 {
2019-05-23 21:07:19 +01:00
return nil , nil
}
2019-09-11 23:37:01 +01:00
redundancy := pointer . GetRemote ( ) . GetRedundancy ( )
2019-05-23 21:07:19 +01:00
required := int ( redundancy . GetMinReq ( ) )
total := int ( redundancy . GetTotal ( ) )
shareSize := redundancy . GetErasureShareSize ( )
fec , err := infectious . NewFEC ( required , total )
if err != nil {
2019-05-27 12:13:47 +01:00
return nil , Error . Wrap ( err )
2019-05-23 21:07:19 +01:00
}
2019-06-04 12:36:27 +01:00
stripeData , err := rebuildStripe ( ctx , fec , correctedShares , int ( shareSize ) )
2019-05-23 21:07:19 +01:00
if err != nil {
2019-05-27 12:13:47 +01:00
return nil , Error . Wrap ( err )
2019-05-23 21:07:19 +01:00
}
for pieceNum , nodeID := range containedNodes {
share := make ( [ ] byte , shareSize )
err = fec . EncodeSingle ( stripeData , share , pieceNum )
if err != nil {
2019-05-27 12:13:47 +01:00
return nil , Error . Wrap ( err )
2019-05-23 21:07:19 +01:00
}
2019-06-07 13:38:41 +01:00
pending = append ( pending , & PendingAudit {
2019-05-23 21:07:19 +01:00
NodeID : nodeID ,
2019-09-11 23:37:01 +01:00
PieceID : pointer . GetRemote ( ) . RootPieceId ,
StripeIndex : randomIndex ,
2019-05-23 21:07:19 +01:00
ShareSize : shareSize ,
ExpectedShareHash : pkcrypto . SHA256Hash ( share ) ,
2019-09-11 23:37:01 +01:00
Path : path ,
2019-05-23 21:07:19 +01:00
} )
}
2019-06-07 13:38:41 +01:00
return pending , nil
2019-05-23 21:07:19 +01:00
}
2019-06-04 12:36:27 +01:00
func rebuildStripe ( ctx context . Context , fec * infectious . FEC , corrected [ ] infectious . Share , shareSize int ) ( _ [ ] byte , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
2019-05-23 21:07:19 +01:00
stripe := make ( [ ] byte , fec . Required ( ) * shareSize )
2019-06-04 12:36:27 +01:00
err = fec . Rebuild ( corrected , func ( share infectious . Share ) {
2019-05-23 21:07:19 +01:00
copy ( stripe [ share . Number * shareSize : ] , share . Data )
} )
if err != nil {
return nil , err
}
return stripe , nil
}
2019-09-11 23:37:01 +01:00
// GetRandomStripe takes a pointer and returns a random stripe index within that pointer.
func GetRandomStripe ( ctx context . Context , pointer * pb . Pointer ) ( index int64 , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
redundancy , err := eestream . NewRedundancyStrategyFromProto ( pointer . GetRemote ( ) . GetRedundancy ( ) )
if err != nil {
return 0 , err
}
// the last segment could be smaller than stripe size
if pointer . GetSegmentSize ( ) < int64 ( redundancy . StripeSize ( ) ) {
return 0 , nil
}
var src cryptoSource
rnd := rand . New ( src )
numStripes := pointer . GetSegmentSize ( ) / int64 ( redundancy . StripeSize ( ) )
randomStripeIndex := rnd . Int63n ( numStripes )
return randomStripeIndex , nil
}