2019-03-18 10:55:06 +00:00
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package piecestore
import (
"context"
2020-04-15 20:32:22 +01:00
"fmt"
2019-03-18 10:55:06 +00:00
"io"
2023-01-23 13:30:44 +00:00
"net"
2019-06-03 10:17:09 +01:00
"os"
2022-12-14 20:04:41 +00:00
"sync"
2019-07-03 14:47:55 +01:00
"sync/atomic"
2019-03-18 10:55:06 +00:00
"time"
2019-11-08 20:40:39 +00:00
"github.com/spacemonkeygo/monkit/v3"
2019-03-18 10:55:06 +00:00
"github.com/zeebo/errs"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
2019-12-27 11:48:47 +00:00
"storj.io/common/bloomfilter"
2020-03-20 13:01:12 +00:00
"storj.io/common/context2"
2019-12-27 11:48:47 +00:00
"storj.io/common/errs2"
"storj.io/common/identity"
"storj.io/common/memory"
"storj.io/common/pb"
"storj.io/common/rpc/rpcstatus"
2019-11-19 01:50:56 +00:00
"storj.io/common/rpc/rpctimeout"
2019-12-27 11:48:47 +00:00
"storj.io/common/signing"
"storj.io/common/storj"
"storj.io/common/sync2"
2022-03-17 17:00:02 +00:00
"storj.io/drpc"
2023-01-23 13:30:44 +00:00
"storj.io/drpc/drpcctx"
2019-03-18 10:55:06 +00:00
"storj.io/storj/storagenode/bandwidth"
"storj.io/storj/storagenode/monitor"
"storj.io/storj/storagenode/orders"
2020-10-01 23:52:22 +01:00
"storj.io/storj/storagenode/orders/ordersfile"
2019-03-18 10:55:06 +00:00
"storj.io/storj/storagenode/pieces"
2020-05-27 22:07:24 +01:00
"storj.io/storj/storagenode/piecestore/usedserials"
2019-08-19 19:52:47 +01:00
"storj.io/storj/storagenode/retain"
2019-03-18 10:55:06 +00:00
"storj.io/storj/storagenode/trust"
)
var (
mon = monkit . Package ( )
)
2019-08-02 23:49:39 +01:00
2020-07-16 15:18:02 +01:00
// OldConfig contains everything necessary for a server.
2019-04-26 06:17:18 +01:00
type OldConfig struct {
2019-07-03 18:29:18 +01:00
Path string ` help:"path to store data in" default:"$CONFDIR/storage" `
2019-11-16 00:59:32 +00:00
WhitelistedSatellites storj . NodeURLs ` help:"a comma-separated list of approved satellite node urls (unused)" devDefault:"" releaseDefault:"" `
2019-07-03 18:29:18 +01:00
AllocatedDiskSpace memory . Size ` user:"true" help:"total allocated disk space in bytes" default:"1TB" `
2020-02-12 21:19:42 +00:00
AllocatedBandwidth memory . Size ` user:"true" help:"total allocated bandwidth in bytes (deprecated)" default:"0B" `
2019-07-03 18:29:18 +01:00
KBucketRefreshInterval time . Duration ` help:"how frequently Kademlia bucket should be refreshed with node stats" default:"1h0m0s" `
2019-04-26 06:17:18 +01:00
}
2019-03-18 10:55:06 +00:00
// Config defines parameters for piecestore endpoint.
type Config struct {
2020-04-30 23:08:32 +01:00
DatabaseDir string ` help:"directory to store databases. if empty, uses data path" default:"" `
2020-02-12 21:08:28 +00:00
ExpirationGracePeriod time . Duration ` help:"how soon before expiration date should things be considered expired" default:"48h0m0s" `
MaxConcurrentRequests int ` help:"how many concurrent requests are allowed, before uploads are rejected. 0 represents unlimited." default:"0" `
2020-05-15 18:10:26 +01:00
DeleteWorkers int ` help:"how many piece delete workers" default:"1" `
DeleteQueueSize int ` help:"size of the piece delete queue" default:"10000" `
2022-12-14 20:04:41 +00:00
ExistsCheckWorkers int ` help:"how many workers to use to check if satellite pieces exists" default:"5" `
2020-05-27 22:07:24 +01:00
OrderLimitGracePeriod time . Duration ` help:"how long after OrderLimit creation date are OrderLimits no longer accepted" default:"1h0m0s" `
2020-02-12 21:08:28 +00:00
CacheSyncInterval time . Duration ` help:"how often the space used cache is synced to persistent storage" releaseDefault:"1h0m0s" devDefault:"0h1m0s" `
2022-08-07 23:40:59 +01:00
PieceScanOnStartup bool ` help:"if set to true, all pieces disk usage is recalculated on startup" default:"true" `
2020-02-12 21:08:28 +00:00
StreamOperationTimeout time . Duration ` help:"how long to spend waiting for a stream operation before canceling" default:"30m" `
RetainTimeBuffer time . Duration ` help:"allows for small differences in the satellite and storagenode clocks" default:"48h0m0s" `
2020-05-25 11:13:28 +01:00
ReportCapacityThreshold memory . Size ` help:"threshold below which to immediately notify satellite of capacity" default:"500MB" hidden:"true" `
2020-05-27 22:07:24 +01:00
MaxUsedSerialsSize memory . Size ` help:"amount of memory allowed for used serials store - once surpassed, serials will be dropped at random" default:"1MB" `
2019-08-19 19:52:47 +01:00
2021-07-01 07:59:04 +01:00
MinUploadSpeed memory . Size ` help:"a client upload speed should not be lower than MinUploadSpeed in bytes-per-second (E.g: 1Mb), otherwise, it will be flagged as slow-connection and potentially be closed" default:"0Mb" `
MinUploadSpeedGraceDuration time . Duration ` help:"if MinUploadSpeed is configured, after a period of time after the client initiated the upload, the server will flag unusually slow upload client" default:"0h0m10s" `
MinUploadSpeedCongestionThreshold float64 ` help:"if the portion defined by the total number of alive connection per MaxConcurrentRequest reaches this threshold, a slow upload client will no longer be monitored and flagged" default:"0.8" `
2019-11-16 00:59:32 +00:00
Trust trust . Config
2019-03-18 10:55:06 +00:00
Monitor monitor . Config
2019-08-22 15:33:14 +01:00
Orders orders . Config
2019-03-18 10:55:06 +00:00
}
2019-10-03 19:31:39 +01:00
type pingStatsSource interface {
WasPinged ( when time . Time )
}
2019-09-10 14:24:16 +01:00
// Endpoint implements uploading, downloading and deleting for a storage node..
//
// architecture: Endpoint
2019-03-18 10:55:06 +00:00
type Endpoint struct {
2021-03-29 09:58:04 +01:00
pb . DRPCContactUnimplementedServer
2020-05-06 11:54:14 +01:00
log * zap . Logger
config Config
2019-03-18 10:55:06 +00:00
2019-10-03 19:31:39 +01:00
signer signing . Signer
trust * trust . Pool
monitor * monitor . Service
retain * retain . Service
pingStats pingStatsSource
2019-03-18 10:55:06 +00:00
2020-04-20 21:29:18 +01:00
store * pieces . Store
2022-12-15 17:52:05 +00:00
trashChore * pieces . TrashChore
2020-07-01 23:05:01 +01:00
ordersStore * orders . FileStore
2020-04-20 21:29:18 +01:00
usage bandwidth . DB
2020-05-27 22:07:24 +01:00
usedSerials * usedserials . Table
2020-04-20 21:29:18 +01:00
pieceDeleter * pieces . Deleter
2019-07-03 14:47:55 +01:00
2019-10-23 20:43:43 +01:00
liveRequests int32
2019-03-18 10:55:06 +00:00
}
// NewEndpoint creates a new piecestore endpoint.
2022-12-15 17:52:05 +00:00
func NewEndpoint ( log * zap . Logger , signer signing . Signer , trust * trust . Pool , monitor * monitor . Service , retain * retain . Service , pingStats pingStatsSource , store * pieces . Store , trashChore * pieces . TrashChore , pieceDeleter * pieces . Deleter , ordersStore * orders . FileStore , usage bandwidth . DB , usedSerials * usedserials . Table , config Config ) ( * Endpoint , error ) {
2019-03-18 10:55:06 +00:00
return & Endpoint {
2020-05-06 11:54:14 +01:00
log : log ,
config : config ,
2019-03-18 10:55:06 +00:00
2019-10-03 19:31:39 +01:00
signer : signer ,
trust : trust ,
monitor : monitor ,
retain : retain ,
pingStats : pingStats ,
2019-03-18 10:55:06 +00:00
2020-04-20 21:29:18 +01:00
store : store ,
2022-12-15 17:52:05 +00:00
trashChore : trashChore ,
2020-07-01 23:05:01 +01:00
ordersStore : ordersStore ,
2020-04-20 21:29:18 +01:00
usage : usage ,
usedSerials : usedSerials ,
pieceDeleter : pieceDeleter ,
2019-07-03 14:47:55 +01:00
2019-10-23 20:43:43 +01:00
liveRequests : 0 ,
2019-03-18 10:55:06 +00:00
} , nil
}
2019-08-02 23:49:39 +01:00
var monLiveRequests = mon . TaskNamed ( "live-request" )
2019-11-26 17:47:19 +00:00
// Delete handles deleting a piece on piece store requested by uplink.
//
2020-10-13 13:47:55 +01:00
// Deprecated: use DeletePieces instead.
2019-03-18 10:55:06 +00:00
func ( endpoint * Endpoint ) Delete ( ctx context . Context , delete * pb . PieceDeleteRequest ) ( _ * pb . PieceDeleteResponse , err error ) {
2019-08-02 23:49:39 +01:00
defer monLiveRequests ( & ctx ) ( & err )
2019-03-18 10:55:06 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-10-23 20:43:43 +01:00
atomic . AddInt32 ( & endpoint . liveRequests , 1 )
defer atomic . AddInt32 ( & endpoint . liveRequests , - 1 )
2019-07-03 14:47:55 +01:00
2019-10-03 19:31:39 +01:00
endpoint . pingStats . WasPinged ( time . Now ( ) )
2019-03-18 10:55:06 +00:00
if delete . Limit . Action != pb . PieceAction_DELETE {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"expected delete action got %v" , delete . Limit . Action )
2019-03-18 10:55:06 +00:00
}
2019-07-30 17:58:08 +01:00
if err := endpoint . verifyOrderLimit ( ctx , delete . Limit ) ; err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Wrap ( rpcstatus . Unauthenticated , err )
2019-03-18 10:55:06 +00:00
}
2023-01-23 13:30:44 +00:00
remoteAddrLogField := zap . String ( "Remote Address" , getRemoteAddr ( ctx ) )
2019-08-08 02:47:30 +01:00
if err := endpoint . store . Delete ( ctx , delete . Limit . SatelliteId , delete . Limit . PieceId ) ; err != nil {
2019-03-18 10:55:06 +00:00
// explicitly ignoring error because the errors
2019-12-02 11:18:20 +00:00
// TODO: https://storjlabs.atlassian.net/browse/V3-3222
// report rpc status of internal server error or not found error,
// e.g. not found might happen when we get a deletion request after garbage
// collection has deleted it
2023-01-23 13:30:44 +00:00
endpoint . log . Error ( "delete failed" , zap . Stringer ( "Satellite ID" , delete . Limit . SatelliteId ) , zap . Stringer ( "Piece ID" , delete . Limit . PieceId ) , remoteAddrLogField , zap . Error ( err ) )
2019-03-18 10:55:06 +00:00
} else {
2023-01-23 13:30:44 +00:00
endpoint . log . Info ( "deleted" , zap . Stringer ( "Satellite ID" , delete . Limit . SatelliteId ) , zap . Stringer ( "Piece ID" , delete . Limit . PieceId ) , remoteAddrLogField )
2019-03-18 10:55:06 +00:00
}
return & pb . PieceDeleteResponse { } , nil
}
2019-12-18 15:33:12 +00:00
// DeletePieces delete a list of pieces on satellite request.
func ( endpoint * Endpoint ) DeletePieces (
ctx context . Context , req * pb . DeletePiecesRequest ,
) ( _ * pb . DeletePiecesResponse , err error ) {
defer mon . Task ( ) ( & ctx , req . PieceIds ) ( & err )
peer , err := identity . PeerIdentityFromContext ( ctx )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Wrap ( rpcstatus . Unauthenticated , err )
2019-12-18 15:33:12 +00:00
}
err = endpoint . trust . VerifySatelliteID ( ctx , peer . ID )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Error ( rpcstatus . PermissionDenied , "delete pieces called with untrusted ID" )
2019-12-18 15:33:12 +00:00
}
2020-04-24 00:09:15 +01:00
unhandled := endpoint . pieceDeleter . Enqueue ( ctx , peer . ID , req . PieceIds )
return & pb . DeletePiecesResponse {
UnhandledCount : int64 ( unhandled ) ,
} , nil
2019-12-18 15:33:12 +00:00
}
2022-12-14 20:04:41 +00:00
// Exists check if pieces from the list exists on storage node. Request will
// accept only connections from trusted satellite and will check pieces only
// for that satellite.
func ( endpoint * Endpoint ) Exists (
ctx context . Context , req * pb . ExistsRequest ,
) ( _ * pb . ExistsResponse , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
peer , err := identity . PeerIdentityFromContext ( ctx )
if err != nil {
return nil , rpcstatus . Wrap ( rpcstatus . Unauthenticated , err )
}
err = endpoint . trust . VerifySatelliteID ( ctx , peer . ID )
if err != nil {
return nil , rpcstatus . Error ( rpcstatus . PermissionDenied , "piecestore.exists called with untrusted ID" )
}
if len ( req . PieceIds ) == 0 {
return & pb . ExistsResponse { } , nil
}
2022-12-19 07:53:10 +00:00
if endpoint . config . ExistsCheckWorkers < 1 {
endpoint . config . ExistsCheckWorkers = 1
}
2022-12-14 20:04:41 +00:00
limiter := sync2 . NewLimiter ( endpoint . config . ExistsCheckWorkers )
var mu sync . Mutex
missing := make ( [ ] uint32 , 0 , 100 )
addMissing := func ( index int ) {
mu . Lock ( )
defer mu . Unlock ( )
missing = append ( missing , uint32 ( index ) )
}
for index , pieceID := range req . PieceIds {
index := index
pieceID := pieceID
ok := limiter . Go ( ctx , func ( ) {
_ , err := endpoint . store . Stat ( ctx , peer . ID , pieceID )
if err != nil {
if errs . Is ( err , os . ErrNotExist ) {
addMissing ( index )
}
endpoint . log . Debug ( "failed to stat piece" , zap . String ( "Piece ID" , pieceID . String ( ) ) , zap . String ( "Satellite ID" , peer . ID . String ( ) ) , zap . Error ( err ) )
return
}
} )
if ! ok {
2022-12-19 07:53:10 +00:00
limiter . Wait ( )
return nil , rpcstatus . Wrap ( rpcstatus . Canceled , ctx . Err ( ) )
2022-12-14 20:04:41 +00:00
}
}
limiter . Wait ( )
return & pb . ExistsResponse {
Missing : missing ,
2022-12-19 07:53:10 +00:00
} , nil
2022-12-14 20:04:41 +00:00
}
2019-09-12 22:09:46 +01:00
// Upload handles uploading a piece on piece store.
2020-05-11 06:20:34 +01:00
func ( endpoint * Endpoint ) Upload ( stream pb . DRPCPiecestore_UploadStream ) ( err error ) {
2019-03-18 10:55:06 +00:00
ctx := stream . Context ( )
2019-08-02 23:49:39 +01:00
defer monLiveRequests ( & ctx ) ( & err )
2019-03-18 10:55:06 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-07-03 14:47:55 +01:00
2019-10-23 20:43:43 +01:00
liveRequests := atomic . AddInt32 ( & endpoint . liveRequests , 1 )
defer atomic . AddInt32 ( & endpoint . liveRequests , - 1 )
2019-07-03 14:47:55 +01:00
2019-10-03 19:31:39 +01:00
endpoint . pingStats . WasPinged ( time . Now ( ) )
2020-05-11 06:20:34 +01:00
if endpoint . config . MaxConcurrentRequests > 0 && int ( liveRequests ) > endpoint . config . MaxConcurrentRequests {
2020-04-15 20:32:22 +01:00
endpoint . log . Error ( "upload rejected, too many requests" ,
zap . Int32 ( "live requests" , liveRequests ) ,
2020-05-11 06:20:34 +01:00
zap . Int ( "requestLimit" , endpoint . config . MaxConcurrentRequests ) ,
2020-04-15 20:32:22 +01:00
)
2020-05-11 06:20:34 +01:00
errMsg := fmt . Sprintf ( "storage node overloaded, request limit: %d" , endpoint . config . MaxConcurrentRequests )
2020-04-15 20:32:22 +01:00
return rpcstatus . Error ( rpcstatus . Unavailable , errMsg )
2019-07-03 14:47:55 +01:00
}
2019-05-30 16:44:47 +01:00
startTime := time . Now ( ) . UTC ( )
2019-03-18 10:55:06 +00:00
// TODO: set maximum message size
2019-11-19 01:50:56 +00:00
// N.B.: we are only allowed to use message if the returned error is nil. it would be
// a race condition otherwise as Run does not wait for the closure to exit.
2019-03-18 10:55:06 +00:00
var message * pb . PieceUploadRequest
2019-11-19 01:50:56 +00:00
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
message , err = stream . Recv ( )
return err
} )
2019-03-18 10:55:06 +00:00
switch {
case err != nil :
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
case message == nil :
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected a message" )
2019-03-18 10:55:06 +00:00
case message . Limit == nil :
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected order limit as the first message" )
2019-03-18 10:55:06 +00:00
}
limit := message . Limit
2022-08-01 10:30:33 +01:00
hashAlgorithm := message . HashAlgorithm
2019-06-11 04:30:17 +01:00
2019-03-18 10:55:06 +00:00
if limit . Action != pb . PieceAction_PUT && limit . Action != pb . PieceAction_PUT_REPAIR {
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument , "expected put or put repair action got %v" , limit . Action )
2019-03-18 10:55:06 +00:00
}
2019-07-30 17:58:08 +01:00
if err := endpoint . verifyOrderLimit ( ctx , limit ) ; err != nil {
return err
2019-03-18 10:55:06 +00:00
}
2020-01-30 15:39:08 +00:00
availableSpace , err := endpoint . monitor . AvailableSpace ( ctx )
if err != nil {
return rpcstatus . Wrap ( rpcstatus . Internal , err )
}
2020-02-12 21:08:28 +00:00
// if availableSpace has fallen below ReportCapacityThreshold, report capacity to satellites
defer func ( ) {
if availableSpace < endpoint . config . ReportCapacityThreshold . Int64 ( ) {
2020-02-26 02:39:44 +00:00
endpoint . monitor . NotifyLowDisk ( )
2020-02-12 21:08:28 +00:00
}
} ( )
2020-07-21 19:39:21 +01:00
if availableSpace < limit . Limit {
return rpcstatus . Errorf ( rpcstatus . Aborted , "not enough available disk space, have: %v, need: %v" , availableSpace , limit . Limit )
2020-06-02 14:01:54 +01:00
}
2023-01-23 13:30:44 +00:00
remoteAddrLogField := zap . String ( "Remote Address" , getRemoteAddr ( ctx ) )
2019-05-30 16:44:47 +01:00
var pieceWriter * pieces . Writer
2020-07-06 22:17:37 +01:00
// committed is set to true when the piece is committed.
// It is used to distinguish successful pieces where the uplink cancels the connections,
// and pieces that were actually canceled before being completed.
var committed bool
2019-03-18 10:55:06 +00:00
defer func ( ) {
2019-05-30 16:44:47 +01:00
endTime := time . Now ( ) . UTC ( )
dt := endTime . Sub ( startTime )
uploadSize := int64 ( 0 )
if pieceWriter != nil {
uploadSize = pieceWriter . Size ( )
}
uploadRate := float64 ( 0 )
if dt . Seconds ( ) > 0 {
uploadRate = float64 ( uploadSize ) / dt . Seconds ( )
}
uploadDuration := dt . Nanoseconds ( )
2020-07-06 22:17:37 +01:00
if err != nil && ! errs2 . IsCanceled ( err ) {
2021-11-18 12:50:39 +00:00
mon . Counter ( "upload_failure_count" ) . Inc ( 1 )
2019-06-04 14:22:00 +01:00
mon . Meter ( "upload_failure_byte_meter" ) . Mark64 ( uploadSize )
2019-05-30 16:44:47 +01:00
mon . IntVal ( "upload_failure_size_bytes" ) . Observe ( uploadSize )
mon . IntVal ( "upload_failure_duration_ns" ) . Observe ( uploadDuration )
mon . FloatVal ( "upload_failure_rate_bytes_per_sec" ) . Observe ( uploadRate )
2023-01-23 13:30:44 +00:00
endpoint . log . Error ( "upload failed" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) , zap . Error ( err ) , zap . Int64 ( "Size" , uploadSize ) , remoteAddrLogField )
2022-03-17 17:00:02 +00:00
} else if ( errs2 . IsCanceled ( err ) || drpc . ClosedError . Has ( err ) ) && ! committed {
2021-11-18 12:50:39 +00:00
mon . Counter ( "upload_cancel_count" ) . Inc ( 1 )
2020-07-06 22:17:37 +01:00
mon . Meter ( "upload_cancel_byte_meter" ) . Mark64 ( uploadSize )
mon . IntVal ( "upload_cancel_size_bytes" ) . Observe ( uploadSize )
mon . IntVal ( "upload_cancel_duration_ns" ) . Observe ( uploadDuration )
mon . FloatVal ( "upload_cancel_rate_bytes_per_sec" ) . Observe ( uploadRate )
2023-01-23 13:30:44 +00:00
endpoint . log . Info ( "upload canceled" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) , zap . Int64 ( "Size" , uploadSize ) , remoteAddrLogField )
2019-03-18 10:55:06 +00:00
} else {
2021-11-18 12:50:39 +00:00
mon . Counter ( "upload_success_count" ) . Inc ( 1 )
2019-06-04 14:22:00 +01:00
mon . Meter ( "upload_success_byte_meter" ) . Mark64 ( uploadSize )
2019-05-30 16:44:47 +01:00
mon . IntVal ( "upload_success_size_bytes" ) . Observe ( uploadSize )
mon . IntVal ( "upload_success_duration_ns" ) . Observe ( uploadDuration )
mon . FloatVal ( "upload_success_rate_bytes_per_sec" ) . Observe ( uploadRate )
2023-01-23 13:30:44 +00:00
endpoint . log . Info ( "uploaded" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) , zap . Int64 ( "Size" , uploadSize ) , remoteAddrLogField )
2019-03-18 10:55:06 +00:00
}
} ( )
2020-01-30 15:39:08 +00:00
endpoint . log . Info ( "upload started" ,
zap . Stringer ( "Piece ID" , limit . PieceId ) ,
zap . Stringer ( "Satellite ID" , limit . SatelliteId ) ,
zap . Stringer ( "Action" , limit . Action ) ,
2023-01-23 13:30:44 +00:00
zap . Int64 ( "Available Space" , availableSpace ) ,
remoteAddrLogField )
2021-11-18 12:50:39 +00:00
mon . Counter ( "upload_started_count" ) . Inc ( 1 )
2020-01-30 15:39:08 +00:00
2022-08-01 10:30:33 +01:00
pieceWriter , err = endpoint . store . Writer ( ctx , limit . SatelliteId , limit . PieceId , hashAlgorithm )
2019-03-18 10:55:06 +00:00
if err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
defer func ( ) {
// cancel error if it hasn't been committed
2019-06-04 13:31:39 +01:00
if cancelErr := pieceWriter . Cancel ( ctx ) ; cancelErr != nil {
2020-02-04 11:03:16 +00:00
if errs2 . IsCanceled ( cancelErr ) {
return
}
2019-04-17 11:09:44 +01:00
endpoint . log . Error ( "error during canceling a piece write" , zap . Error ( cancelErr ) )
2019-03-18 10:55:06 +00:00
}
} ( )
2019-10-16 03:17:17 +01:00
// Ensure that the order is saved even in the face of an error. In the
// success path, the order will be saved just before sending the response
// and closing the stream (in which case, orderSaved will be true).
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
commitOrderToStore , err := endpoint . beginSaveOrder ( limit )
if err != nil {
return rpcstatus . Wrap ( rpcstatus . InvalidArgument , err )
}
largestOrder := pb . Order { }
defer commitOrderToStore ( ctx , & largestOrder )
2019-03-18 10:55:06 +00:00
2021-07-01 07:59:04 +01:00
// monitor speed of upload client to flag out slow uploads.
speedEstimate := speedEstimation {
grace : endpoint . config . MinUploadSpeedGraceDuration ,
limit : endpoint . config . MinUploadSpeed ,
}
2019-03-18 10:55:06 +00:00
for {
2021-07-01 07:59:04 +01:00
if err := speedEstimate . EnsureLimit ( memory . Size ( pieceWriter . Size ( ) ) , endpoint . isCongested ( ) , time . Now ( ) ) ; err != nil {
return rpcstatus . Wrap ( rpcstatus . Aborted , err )
}
2019-11-19 01:50:56 +00:00
// TODO: reuse messages to avoid allocations
// N.B.: we are only allowed to use message if the returned error is nil. it would be
// a race condition otherwise as Run does not wait for the closure to exit.
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
message , err = stream . Recv ( )
return err
} )
if errs . Is ( err , io . EOF ) {
return rpcstatus . Error ( rpcstatus . InvalidArgument , "unexpected EOF" )
2019-03-18 10:55:06 +00:00
} else if err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-11-19 01:50:56 +00:00
2019-03-18 10:55:06 +00:00
if message == nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected a message" )
2019-03-18 10:55:06 +00:00
}
2019-07-08 15:26:19 +01:00
if message . Order == nil && message . Chunk == nil && message . Done == nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected a message" )
2019-07-08 15:26:19 +01:00
}
2019-03-18 10:55:06 +00:00
2019-07-08 15:26:19 +01:00
if message . Order != nil {
2019-07-11 21:51:40 +01:00
if err := endpoint . VerifyOrder ( ctx , limit , message . Order , largestOrder . Amount ) ; err != nil {
2019-03-18 10:55:06 +00:00
return err
}
largestOrder = * message . Order
2019-07-08 15:26:19 +01:00
}
2019-03-18 10:55:06 +00:00
2019-07-08 15:26:19 +01:00
if message . Chunk != nil {
2019-03-18 10:55:06 +00:00
if message . Chunk . Offset != pieceWriter . Size ( ) {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "chunk out of order" )
2019-03-18 10:55:06 +00:00
}
2019-04-15 11:12:22 +01:00
chunkSize := int64 ( len ( message . Chunk . Data ) )
if largestOrder . Amount < pieceWriter . Size ( ) + chunkSize {
2019-03-18 10:55:06 +00:00
// TODO: should we write currently and give a chance for uplink to remedy the situation?
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"not enough allocated, allocated=%v writing=%v" ,
largestOrder . Amount , pieceWriter . Size ( ) + int64 ( len ( message . Chunk . Data ) ) )
2019-03-18 10:55:06 +00:00
}
2019-04-15 11:12:22 +01:00
availableSpace -= chunkSize
if availableSpace < 0 {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . Internal , "out of space" )
2019-04-15 11:12:22 +01:00
}
2019-03-18 10:55:06 +00:00
if _ , err := pieceWriter . Write ( message . Chunk . Data ) ; err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-07-08 15:26:19 +01:00
}
2019-03-18 10:55:06 +00:00
2019-07-08 15:26:19 +01:00
if message . Done != nil {
2022-08-01 10:30:33 +01:00
if message . Done . HashAlgorithm != hashAlgorithm {
return rpcstatus . Wrap ( rpcstatus . Internal , errs . New ( "Hash algorithm in the first and last upload message are different %s %s" , hashAlgorithm , message . Done . HashAlgorithm ) )
}
2019-08-08 02:47:30 +01:00
calculatedHash := pieceWriter . Hash ( )
if err := endpoint . VerifyPieceHash ( ctx , limit , message . Done , calculatedHash ) ; err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-07-15 16:26:18 +01:00
if message . Done . PieceSize != pieceWriter . Size ( ) {
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"Size of finished piece does not match size declared by uplink! %d != %d" ,
2019-08-08 02:47:30 +01:00
message . Done . PieceSize , pieceWriter . Size ( ) )
2019-03-18 10:55:06 +00:00
}
{
2019-08-08 02:47:30 +01:00
info := & pb . PieceHeader {
2022-08-01 10:30:33 +01:00
Hash : calculatedHash ,
HashAlgorithm : hashAlgorithm ,
CreationTime : message . Done . Timestamp ,
Signature : message . Done . GetSignature ( ) ,
OrderLimit : * limit ,
2019-03-18 10:55:06 +00:00
}
2019-08-08 02:47:30 +01:00
if err := pieceWriter . Commit ( ctx , info ) ; err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-08-08 02:47:30 +01:00
}
2020-07-06 22:17:37 +01:00
committed = true
2019-08-08 02:47:30 +01:00
if ! limit . PieceExpiration . IsZero ( ) {
err := endpoint . store . SetExpiration ( ctx , limit . SatelliteId , limit . PieceId , limit . PieceExpiration )
if err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-08-08 02:47:30 +01:00
}
2019-03-18 10:55:06 +00:00
}
}
2019-06-05 14:47:01 +01:00
storageNodeHash , err := signing . SignPieceHash ( ctx , endpoint . signer , & pb . PieceHash {
2022-08-01 10:30:33 +01:00
PieceId : limit . PieceId ,
Hash : calculatedHash ,
HashAlgorithm : hashAlgorithm ,
PieceSize : pieceWriter . Size ( ) ,
Timestamp : time . Now ( ) ,
2019-03-18 10:55:06 +00:00
} )
if err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-11-19 01:50:56 +00:00
closeErr := rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
return stream . SendAndClose ( & pb . PieceUploadResponse { Done : storageNodeHash } )
2019-03-18 10:55:06 +00:00
} )
2019-11-19 01:50:56 +00:00
if errs . Is ( closeErr , io . EOF ) {
closeErr = nil
}
if closeErr != nil {
return rpcstatus . Wrap ( rpcstatus . Internal , closeErr )
}
return nil
2019-03-18 10:55:06 +00:00
}
}
}
2021-07-01 07:59:04 +01:00
// isCongested identifies state of congestion. If the total number of
// connections is above 80% of the MaxConcurrentRequests, then it is defined
// as congestion.
func ( endpoint * Endpoint ) isCongested ( ) bool {
requestCongestionThreshold := int32 ( float64 ( endpoint . config . MaxConcurrentRequests ) * endpoint . config . MinUploadSpeedCongestionThreshold )
connectionCount := atomic . LoadInt32 ( & endpoint . liveRequests )
return connectionCount > requestCongestionThreshold
}
2020-05-11 06:20:34 +01:00
// Download handles Downloading a piece on piecestore.
func ( endpoint * Endpoint ) Download ( stream pb . DRPCPiecestore_DownloadStream ) ( err error ) {
2019-03-18 10:55:06 +00:00
ctx := stream . Context ( )
2019-08-02 23:49:39 +01:00
defer monLiveRequests ( & ctx ) ( & err )
2019-03-18 10:55:06 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-07-03 14:47:55 +01:00
2019-10-23 20:43:43 +01:00
atomic . AddInt32 ( & endpoint . liveRequests , 1 )
defer atomic . AddInt32 ( & endpoint . liveRequests , - 1 )
2019-07-03 14:47:55 +01:00
2019-05-30 16:44:47 +01:00
startTime := time . Now ( ) . UTC ( )
2019-03-18 10:55:06 +00:00
2019-10-03 19:31:39 +01:00
endpoint . pingStats . WasPinged ( time . Now ( ) )
2019-03-18 10:55:06 +00:00
// TODO: set maximum message size
var message * pb . PieceDownloadRequest
2019-11-19 01:50:56 +00:00
// N.B.: we are only allowed to use message if the returned error is nil. it would be
// a race condition otherwise as Run does not wait for the closure to exit.
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
message , err = stream . Recv ( )
return err
} )
2019-03-18 10:55:06 +00:00
if err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
if message . Limit == nil || message . Chunk == nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected order limit and chunk as the first message" )
2019-03-18 10:55:06 +00:00
}
limit , chunk := message . Limit , message . Chunk
if limit . Action != pb . PieceAction_GET && limit . Action != pb . PieceAction_GET_REPAIR && limit . Action != pb . PieceAction_GET_AUDIT {
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"expected get or get repair or audit action got %v" , limit . Action )
2019-03-18 10:55:06 +00:00
}
if chunk . ChunkSize > limit . Limit {
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"requested more that order limit allows, limit=%v requested=%v" , limit . Limit , chunk . ChunkSize )
2019-03-18 10:55:06 +00:00
}
2021-11-18 12:50:39 +00:00
actionSeriesTag := monkit . NewSeriesTag ( "action" , limit . Action . String ( ) )
2023-01-23 13:30:44 +00:00
remoteAddr := getRemoteAddr ( ctx )
endpoint . log . Info ( "download started" ,
zap . Stringer ( "Piece ID" , limit . PieceId ) ,
zap . Stringer ( "Satellite ID" , limit . SatelliteId ) ,
zap . Stringer ( "Action" , limit . Action ) ,
zap . Int64 ( "Offset" , chunk . Offset ) ,
zap . Int64 ( "Size" , chunk . ChunkSize ) ,
zap . String ( "Remote Address" , remoteAddr ) )
2021-11-18 12:50:39 +00:00
mon . Counter ( "download_started_count" , actionSeriesTag ) . Inc ( 1 )
2020-04-23 18:20:47 +01:00
2019-07-30 17:58:08 +01:00
if err := endpoint . verifyOrderLimit ( ctx , limit ) ; err != nil {
2021-11-18 12:50:39 +00:00
mon . Counter ( "download_failure_count" , actionSeriesTag ) . Inc ( 1 )
mon . Meter ( "download_verify_orderlimit_failed" , actionSeriesTag ) . Mark ( 1 )
2023-01-23 13:30:44 +00:00
endpoint . log . Error ( "download failed" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) , zap . String ( "Remote Address" , remoteAddr ) , zap . Error ( err ) )
2019-11-19 01:50:56 +00:00
return err
2019-03-18 10:55:06 +00:00
}
2019-05-30 16:44:47 +01:00
var pieceReader * pieces . Reader
2019-03-18 10:55:06 +00:00
defer func ( ) {
2019-05-30 16:44:47 +01:00
endTime := time . Now ( ) . UTC ( )
dt := endTime . Sub ( startTime )
downloadSize := int64 ( 0 )
if pieceReader != nil {
downloadSize = pieceReader . Size ( )
}
downloadRate := float64 ( 0 )
if dt . Seconds ( ) > 0 {
downloadRate = float64 ( downloadSize ) / dt . Seconds ( )
}
downloadDuration := dt . Nanoseconds ( )
2022-03-17 17:00:02 +00:00
if errs2 . IsCanceled ( err ) || drpc . ClosedError . Has ( err ) {
2021-11-18 12:50:39 +00:00
mon . Counter ( "download_cancel_count" , actionSeriesTag ) . Inc ( 1 )
mon . Meter ( "download_cancel_byte_meter" , actionSeriesTag ) . Mark64 ( downloadSize )
mon . IntVal ( "download_cancel_size_bytes" , actionSeriesTag ) . Observe ( downloadSize )
mon . IntVal ( "download_cancel_duration_ns" , actionSeriesTag ) . Observe ( downloadDuration )
mon . FloatVal ( "download_cancel_rate_bytes_per_sec" , actionSeriesTag ) . Observe ( downloadRate )
2023-01-23 13:30:44 +00:00
endpoint . log . Info ( "download canceled" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) , zap . Int64 ( "Offset" , chunk . Offset ) , zap . Int64 ( "Size" , downloadSize ) , zap . String ( "Remote Address" , remoteAddr ) )
2020-02-04 11:03:16 +00:00
} else if err != nil {
2021-11-18 12:50:39 +00:00
mon . Counter ( "download_failure_count" , actionSeriesTag ) . Inc ( 1 )
mon . Meter ( "download_failure_byte_meter" , actionSeriesTag ) . Mark64 ( downloadSize )
mon . IntVal ( "download_failure_size_bytes" , actionSeriesTag ) . Observe ( downloadSize )
mon . IntVal ( "download_failure_duration_ns" , actionSeriesTag ) . Observe ( downloadDuration )
mon . FloatVal ( "download_failure_rate_bytes_per_sec" , actionSeriesTag ) . Observe ( downloadRate )
2023-01-23 13:30:44 +00:00
endpoint . log . Error ( "download failed" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) , zap . Int64 ( "Offset" , chunk . Offset ) , zap . Int64 ( "Size" , downloadSize ) , zap . String ( "Remote Address" , remoteAddr ) , zap . Error ( err ) )
2019-03-18 10:55:06 +00:00
} else {
2021-11-18 12:50:39 +00:00
mon . Counter ( "download_success_count" , actionSeriesTag ) . Inc ( 1 )
mon . Meter ( "download_success_byte_meter" , actionSeriesTag ) . Mark64 ( downloadSize )
mon . IntVal ( "download_success_size_bytes" , actionSeriesTag ) . Observe ( downloadSize )
mon . IntVal ( "download_success_duration_ns" , actionSeriesTag ) . Observe ( downloadDuration )
mon . FloatVal ( "download_success_rate_bytes_per_sec" , actionSeriesTag ) . Observe ( downloadRate )
2023-01-23 13:30:44 +00:00
endpoint . log . Info ( "downloaded" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) , zap . Int64 ( "Offset" , chunk . Offset ) , zap . Int64 ( "Size" , downloadSize ) , zap . String ( "Remote Address" , remoteAddr ) )
2019-03-18 10:55:06 +00:00
}
} ( )
2019-05-30 16:44:47 +01:00
pieceReader , err = endpoint . store . Reader ( ctx , limit . SatelliteId , limit . PieceId )
2019-03-18 10:55:06 +00:00
if err != nil {
2019-06-03 10:17:09 +01:00
if os . IsNotExist ( err ) {
2020-08-07 17:19:37 +01:00
endpoint . monitor . VerifyDirReadableLoop . TriggerWait ( )
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . NotFound , err )
2019-06-03 10:17:09 +01:00
}
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
defer func ( ) {
err := pieceReader . Close ( ) // similarly how transcation Rollback works
if err != nil {
2020-02-04 11:03:16 +00:00
if errs2 . IsCanceled ( err ) {
return
}
2019-03-18 10:55:06 +00:00
// no reason to report this error to the uplink
endpoint . log . Error ( "failed to close piece reader" , zap . Error ( err ) )
}
} ( )
2019-08-26 19:57:41 +01:00
// for repair traffic, send along the PieceHash and original OrderLimit for validation
// before sending the piece itself
if message . Limit . Action == pb . PieceAction_GET_REPAIR {
2019-11-13 19:15:31 +00:00
pieceHash , orderLimit , err := endpoint . store . GetHashAndLimit ( ctx , limit . SatelliteId , limit . PieceId , pieceReader )
if err != nil {
endpoint . log . Error ( "could not get hash and order limit" , zap . Error ( err ) )
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-08-26 19:57:41 +01:00
}
2019-11-19 01:50:56 +00:00
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
return stream . Send ( & pb . PieceDownloadResponse { Hash : & pieceHash , Limit : & orderLimit } )
} )
2019-08-26 19:57:41 +01:00
if err != nil {
endpoint . log . Error ( "error sending hash and order limit" , zap . Error ( err ) )
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-08-26 19:57:41 +01:00
}
}
2019-03-18 10:55:06 +00:00
// TODO: verify chunk.Size behavior logic with regards to reading all
if chunk . Offset + chunk . ChunkSize > pieceReader . Size ( ) {
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"requested more data than available, requesting=%v available=%v" ,
chunk . Offset + chunk . ChunkSize , pieceReader . Size ( ) )
2019-03-18 10:55:06 +00:00
}
throttle := sync2 . NewThrottle ( )
// TODO: see whether this can be implemented without a goroutine
group , ctx := errgroup . WithContext ( ctx )
group . Go ( func ( ) ( err error ) {
var maximumChunkSize = 1 * memory . MiB . Int64 ( )
currentOffset := chunk . Offset
unsentAmount := chunk . ChunkSize
for unsentAmount > 0 {
tryToSend := min ( unsentAmount , maximumChunkSize )
// TODO: add timeout here
chunkSize , err := throttle . ConsumeOrWait ( tryToSend )
if err != nil {
// this can happen only because uplink decided to close the connection
2021-05-14 16:05:42 +01:00
return nil //nolint: nilerr // We don't need to return an error when client cancels.
2019-03-18 10:55:06 +00:00
}
chunkData := make ( [ ] byte , chunkSize )
_ , err = pieceReader . Seek ( currentOffset , io . SeekStart )
if err != nil {
2019-08-26 19:57:41 +01:00
endpoint . log . Error ( "error seeking on piecereader" , zap . Error ( err ) )
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-07-25 09:22:15 +01:00
// ReadFull is required to ensure we are sending the right amount of data.
_ , err = io . ReadFull ( pieceReader , chunkData )
2019-03-18 10:55:06 +00:00
if err != nil {
2019-08-26 19:57:41 +01:00
endpoint . log . Error ( "error reading from piecereader" , zap . Error ( err ) )
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-11-19 01:50:56 +00:00
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
return stream . Send ( & pb . PieceDownloadResponse {
Chunk : & pb . PieceDownloadResponse_Chunk {
Offset : currentOffset ,
Data : chunkData ,
} ,
} )
2019-03-18 10:55:06 +00:00
} )
2019-11-19 01:50:56 +00:00
if errs . Is ( err , io . EOF ) {
2019-03-18 10:55:06 +00:00
// err is io.EOF when uplink asked for a piece, but decided not to retrieve it,
// no need to propagate it
2019-11-19 01:50:56 +00:00
return nil
}
if err != nil {
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
currentOffset += chunkSize
unsentAmount -= chunkSize
}
return nil
} )
recvErr := func ( ) ( err error ) {
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
commitOrderToStore , err := endpoint . beginSaveOrder ( limit )
if err != nil {
return err
}
2019-07-01 16:54:11 +01:00
largestOrder := pb . Order { }
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
defer commitOrderToStore ( ctx , & largestOrder )
2019-03-18 10:55:06 +00:00
// ensure that we always terminate sending goroutine
defer throttle . Fail ( io . EOF )
2023-01-05 03:43:10 +00:00
handleOrder := func ( order * pb . Order ) error {
if err := endpoint . VerifyOrder ( ctx , limit , order , largestOrder . Amount ) ; err != nil {
return err
}
chunkSize := order . Amount - largestOrder . Amount
if err := throttle . Produce ( chunkSize ) ; err != nil {
// shouldn't happen since only receiving side is calling Fail
return rpcstatus . Wrap ( rpcstatus . Internal , err )
}
largestOrder = * order
return nil
}
if message . Order != nil {
if err = handleOrder ( message . Order ) ; err != nil {
return err
}
}
2019-03-18 10:55:06 +00:00
for {
2019-11-19 01:50:56 +00:00
// N.B.: we are only allowed to use message if the returned error is nil. it would be
// a race condition otherwise as Run does not wait for the closure to exit.
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
message , err = stream . Recv ( )
return err
} )
if errs . Is ( err , io . EOF ) {
2019-08-23 16:16:43 +01:00
// err is io.EOF or canceled when uplink closed the connection, no need to return error
2019-11-19 01:50:56 +00:00
return nil
}
if errs2 . IsCanceled ( err ) {
return nil
}
if err != nil {
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
if message == nil || message . Order == nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected order as the message" )
2019-03-18 10:55:06 +00:00
}
2023-01-05 03:43:10 +00:00
if err = handleOrder ( message . Order ) ; err != nil {
2019-03-18 10:55:06 +00:00
return err
}
}
} ( )
// ensure we wait for sender to complete
sendErr := group . Wait ( )
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , errs . Combine ( sendErr , recvErr ) )
2019-03-18 10:55:06 +00:00
}
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
// beginSaveOrder saves the order with all necessary information. It assumes it has been already verified.
func ( endpoint * Endpoint ) beginSaveOrder ( limit * pb . OrderLimit ) ( _commit func ( ctx context . Context , order * pb . Order ) , err error ) {
defer mon . Task ( ) ( nil ) ( & err )
2019-06-04 13:31:39 +01:00
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
commit , err := endpoint . ordersStore . BeginEnqueue ( limit . SatelliteId , limit . OrderCreation )
2019-03-18 10:55:06 +00:00
if err != nil {
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
return nil , err
}
done := false
return func ( ctx context . Context , order * pb . Order ) {
2020-08-21 16:24:46 +01:00
if done {
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
return
}
done = true
2020-08-21 16:24:46 +01:00
if order == nil || order . Amount <= 0 {
// free unsent orders file for sending without writing anything
err = commit ( nil )
if err != nil {
endpoint . log . Error ( "failed to unlock orders file" , zap . Error ( err ) )
}
return
}
2020-10-01 23:52:22 +01:00
err = commit ( & ordersfile . Info { Limit : limit , Order : order } )
2019-03-18 10:55:06 +00:00
if err != nil {
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
endpoint . log . Error ( "failed to add order" , zap . Error ( err ) )
} else {
// We always want to save order to the database to be able to settle.
err = endpoint . usage . Add ( context2 . WithoutCancellation ( ctx ) , limit . SatelliteId , limit . Action , order . Amount , time . Now ( ) )
if err != nil {
endpoint . log . Error ( "failed to add bandwidth usage" , zap . Error ( err ) )
}
2019-03-18 10:55:06 +00:00
}
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
} , nil
2019-03-18 10:55:06 +00:00
}
2020-07-16 15:18:02 +01:00
// RestoreTrash restores all trashed items for the satellite issuing the call.
2019-11-20 16:28:49 +00:00
func ( endpoint * Endpoint ) RestoreTrash ( ctx context . Context , restoreTrashReq * pb . RestoreTrashRequest ) ( res * pb . RestoreTrashResponse , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
peer , err := identity . PeerIdentityFromContext ( ctx )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Wrap ( rpcstatus . Unauthenticated , err )
2019-11-20 16:28:49 +00:00
}
err = endpoint . trust . VerifySatelliteID ( ctx , peer . ID )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Error ( rpcstatus . PermissionDenied , "RestoreTrash called with untrusted ID" )
2019-11-20 16:28:49 +00:00
}
2022-12-16 16:14:17 +00:00
err = endpoint . trashChore . StartRestore ( ctx , peer . ID )
if err != nil {
return nil , rpcstatus . Error ( rpcstatus . Internal , "failed to start restore" )
}
2019-11-20 16:28:49 +00:00
return & pb . RestoreTrashResponse { } , nil
}
2020-07-16 15:18:02 +01:00
// Retain keeps only piece ids specified in the request.
2019-07-11 21:04:22 +01:00
func ( endpoint * Endpoint ) Retain ( ctx context . Context , retainReq * pb . RetainRequest ) ( res * pb . RetainResponse , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
2019-07-26 21:49:08 +01:00
// if retain status is disabled, quit immediately
2019-08-19 19:52:47 +01:00
if endpoint . retain . Status ( ) == retain . Disabled {
2019-07-26 21:49:08 +01:00
return & pb . RetainResponse { } , nil
}
2019-07-10 14:41:47 +01:00
peer , err := identity . PeerIdentityFromContext ( ctx )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Wrap ( rpcstatus . Unauthenticated , err )
2019-07-11 21:04:22 +01:00
}
err = endpoint . trust . VerifySatelliteID ( ctx , peer . ID )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Errorf ( rpcstatus . PermissionDenied , "retain called with untrusted ID" )
2019-07-10 14:41:47 +01:00
}
filter , err := bloomfilter . NewFromBytes ( retainReq . GetFilter ( ) )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Wrap ( rpcstatus . InvalidArgument , err )
2019-07-10 14:41:47 +01:00
}
2021-06-04 13:56:42 +01:00
filterHashCount , _ := filter . Parameters ( )
mon . IntVal ( "retain_filter_size" ) . Observe ( filter . Size ( ) )
mon . IntVal ( "retain_filter_hash_count" ) . Observe ( int64 ( filterHashCount ) )
mon . IntVal ( "retain_creation_date" ) . Observe ( retainReq . CreationDate . Unix ( ) )
2019-07-10 14:41:47 +01:00
2019-08-19 19:52:47 +01:00
// the queue function will update the created before time based on the configurable retain buffer
queued := endpoint . retain . Queue ( retain . Request {
SatelliteID : peer . ID ,
CreatedBefore : retainReq . GetCreationDate ( ) ,
Filter : filter ,
2019-08-08 02:47:30 +01:00
} )
2019-08-19 19:52:47 +01:00
if ! queued {
2019-11-05 21:04:07 +00:00
endpoint . log . Debug ( "Retain job not queued for satellite" , zap . Stringer ( "Satellite ID" , peer . ID ) )
2019-07-10 14:41:47 +01:00
}
2019-07-26 21:49:08 +01:00
2019-07-10 14:41:47 +01:00
return & pb . RetainResponse { } , nil
}
2019-09-12 10:26:55 +01:00
// TestLiveRequestCount returns the current number of live requests.
func ( endpoint * Endpoint ) TestLiveRequestCount ( ) int32 {
2019-10-23 20:43:43 +01:00
return atomic . LoadInt32 ( & endpoint . liveRequests )
2019-09-12 10:26:55 +01:00
}
2020-07-16 15:18:02 +01:00
// min finds the min of two values.
2019-03-18 10:55:06 +00:00
func min ( a , b int64 ) int64 {
if a < b {
return a
}
return b
}
2021-07-01 07:59:04 +01:00
// speedEstimation monitors state of incoming traffic. It would signal slow-speed
// client in non-congested traffic condition.
type speedEstimation struct {
// grace indicates a certain period of time before the observator kicks in
grace time . Duration
// limit for flagging slow connections. Speed below this limit is considered to be slow.
limit memory . Size
// uncongestedTime indicates the duration of connection, measured in non-congested state
uncongestedTime time . Duration
lastChecked time . Time
}
// EnsureLimit makes sure that in non-congested condition, a slow-upload client will be flagged out.
func ( estimate * speedEstimation ) EnsureLimit ( transferred memory . Size , congested bool , now time . Time ) error {
if estimate . lastChecked . IsZero ( ) {
estimate . lastChecked = now
return nil
}
delta := now . Sub ( estimate . lastChecked )
estimate . lastChecked = now
// In congested condition, the speed check would produce false-positive results,
// thus it shall be skipped.
if congested {
return nil
}
estimate . uncongestedTime += delta
if estimate . uncongestedTime <= 0 || estimate . uncongestedTime <= estimate . grace {
// not enough data
return nil
}
bytesPerSec := float64 ( transferred ) / estimate . uncongestedTime . Seconds ( )
if bytesPerSec < float64 ( estimate . limit ) {
return errs . New ( "speed too low, current:%v < limit:%v" , bytesPerSec , estimate . limit )
}
return nil
}
2023-01-23 13:30:44 +00:00
// getRemoteAddr returns the remote address from the request context.
func getRemoteAddr ( ctx context . Context ) string {
if transport , ok := drpcctx . Transport ( ctx ) ; ok {
if conn , ok := transport . ( net . Conn ) ; ok {
return conn . RemoteAddr ( ) . String ( )
}
}
return ""
}