2019-03-18 10:55:06 +00:00
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package piecestore
import (
"context"
2020-04-15 20:32:22 +01:00
"fmt"
2019-03-18 10:55:06 +00:00
"io"
2019-06-03 10:17:09 +01:00
"os"
2019-07-03 14:47:55 +01:00
"sync/atomic"
2019-03-18 10:55:06 +00:00
"time"
2019-11-08 20:40:39 +00:00
"github.com/spacemonkeygo/monkit/v3"
2019-03-18 10:55:06 +00:00
"github.com/zeebo/errs"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
2019-12-27 11:48:47 +00:00
"storj.io/common/bloomfilter"
2020-03-20 13:01:12 +00:00
"storj.io/common/context2"
2019-12-27 11:48:47 +00:00
"storj.io/common/errs2"
"storj.io/common/identity"
"storj.io/common/memory"
"storj.io/common/pb"
"storj.io/common/rpc/rpcstatus"
2019-11-19 01:50:56 +00:00
"storj.io/common/rpc/rpctimeout"
2019-12-27 11:48:47 +00:00
"storj.io/common/signing"
"storj.io/common/storj"
"storj.io/common/sync2"
2019-03-18 10:55:06 +00:00
"storj.io/storj/storagenode/bandwidth"
"storj.io/storj/storagenode/monitor"
"storj.io/storj/storagenode/orders"
"storj.io/storj/storagenode/pieces"
2020-05-27 22:07:24 +01:00
"storj.io/storj/storagenode/piecestore/usedserials"
2019-08-19 19:52:47 +01:00
"storj.io/storj/storagenode/retain"
2019-03-18 10:55:06 +00:00
"storj.io/storj/storagenode/trust"
)
var (
mon = monkit . Package ( )
)
2019-08-02 23:49:39 +01:00
2020-07-16 15:18:02 +01:00
// OldConfig contains everything necessary for a server.
2019-04-26 06:17:18 +01:00
type OldConfig struct {
2019-07-03 18:29:18 +01:00
Path string ` help:"path to store data in" default:"$CONFDIR/storage" `
2019-11-16 00:59:32 +00:00
WhitelistedSatellites storj . NodeURLs ` help:"a comma-separated list of approved satellite node urls (unused)" devDefault:"" releaseDefault:"" `
2019-07-03 18:29:18 +01:00
AllocatedDiskSpace memory . Size ` user:"true" help:"total allocated disk space in bytes" default:"1TB" `
2020-02-12 21:19:42 +00:00
AllocatedBandwidth memory . Size ` user:"true" help:"total allocated bandwidth in bytes (deprecated)" default:"0B" `
2019-07-03 18:29:18 +01:00
KBucketRefreshInterval time . Duration ` help:"how frequently Kademlia bucket should be refreshed with node stats" default:"1h0m0s" `
2019-04-26 06:17:18 +01:00
}
2019-03-18 10:55:06 +00:00
// Config defines parameters for piecestore endpoint.
type Config struct {
2020-04-30 23:08:32 +01:00
DatabaseDir string ` help:"directory to store databases. if empty, uses data path" default:"" `
2020-02-12 21:08:28 +00:00
ExpirationGracePeriod time . Duration ` help:"how soon before expiration date should things be considered expired" default:"48h0m0s" `
MaxConcurrentRequests int ` help:"how many concurrent requests are allowed, before uploads are rejected. 0 represents unlimited." default:"0" `
2020-05-15 18:10:26 +01:00
DeleteWorkers int ` help:"how many piece delete workers" default:"1" `
DeleteQueueSize int ` help:"size of the piece delete queue" default:"10000" `
2020-05-27 22:07:24 +01:00
OrderLimitGracePeriod time . Duration ` help:"how long after OrderLimit creation date are OrderLimits no longer accepted" default:"1h0m0s" `
2020-02-12 21:08:28 +00:00
CacheSyncInterval time . Duration ` help:"how often the space used cache is synced to persistent storage" releaseDefault:"1h0m0s" devDefault:"0h1m0s" `
StreamOperationTimeout time . Duration ` help:"how long to spend waiting for a stream operation before canceling" default:"30m" `
RetainTimeBuffer time . Duration ` help:"allows for small differences in the satellite and storagenode clocks" default:"48h0m0s" `
2020-05-25 11:13:28 +01:00
ReportCapacityThreshold memory . Size ` help:"threshold below which to immediately notify satellite of capacity" default:"500MB" hidden:"true" `
2020-05-27 22:07:24 +01:00
MaxUsedSerialsSize memory . Size ` help:"amount of memory allowed for used serials store - once surpassed, serials will be dropped at random" default:"1MB" `
2019-08-19 19:52:47 +01:00
2019-11-16 00:59:32 +00:00
Trust trust . Config
2019-03-18 10:55:06 +00:00
Monitor monitor . Config
2019-08-22 15:33:14 +01:00
Orders orders . Config
2019-03-18 10:55:06 +00:00
}
2019-10-03 19:31:39 +01:00
type pingStatsSource interface {
WasPinged ( when time . Time )
}
2019-09-10 14:24:16 +01:00
// Endpoint implements uploading, downloading and deleting for a storage node..
//
// architecture: Endpoint
2019-03-18 10:55:06 +00:00
type Endpoint struct {
2020-05-06 11:54:14 +01:00
log * zap . Logger
config Config
2019-03-18 10:55:06 +00:00
2019-10-03 19:31:39 +01:00
signer signing . Signer
trust * trust . Pool
monitor * monitor . Service
retain * retain . Service
pingStats pingStatsSource
2019-03-18 10:55:06 +00:00
2020-04-20 21:29:18 +01:00
store * pieces . Store
2020-07-01 23:05:01 +01:00
ordersStore * orders . FileStore
2020-04-20 21:29:18 +01:00
orders orders . DB
usage bandwidth . DB
2020-05-27 22:07:24 +01:00
usedSerials * usedserials . Table
2020-04-20 21:29:18 +01:00
pieceDeleter * pieces . Deleter
2019-07-03 14:47:55 +01:00
2019-10-23 20:43:43 +01:00
liveRequests int32
2019-03-18 10:55:06 +00:00
}
// NewEndpoint creates a new piecestore endpoint.
2020-07-01 23:05:01 +01:00
func NewEndpoint ( log * zap . Logger , signer signing . Signer , trust * trust . Pool , monitor * monitor . Service , retain * retain . Service , pingStats pingStatsSource , store * pieces . Store , pieceDeleter * pieces . Deleter , ordersStore * orders . FileStore , orders orders . DB , usage bandwidth . DB , usedSerials * usedserials . Table , config Config ) ( * Endpoint , error ) {
2019-03-18 10:55:06 +00:00
return & Endpoint {
2020-05-06 11:54:14 +01:00
log : log ,
config : config ,
2019-03-18 10:55:06 +00:00
2019-10-03 19:31:39 +01:00
signer : signer ,
trust : trust ,
monitor : monitor ,
retain : retain ,
pingStats : pingStats ,
2019-03-18 10:55:06 +00:00
2020-04-20 21:29:18 +01:00
store : store ,
2020-07-01 23:05:01 +01:00
ordersStore : ordersStore ,
2020-04-20 21:29:18 +01:00
orders : orders ,
usage : usage ,
usedSerials : usedSerials ,
pieceDeleter : pieceDeleter ,
2019-07-03 14:47:55 +01:00
2019-10-23 20:43:43 +01:00
liveRequests : 0 ,
2019-03-18 10:55:06 +00:00
} , nil
}
2019-08-02 23:49:39 +01:00
var monLiveRequests = mon . TaskNamed ( "live-request" )
2019-11-26 17:47:19 +00:00
// Delete handles deleting a piece on piece store requested by uplink.
//
2019-12-18 15:33:12 +00:00
// DEPRECATED in favor of DeletePieces.
2019-03-18 10:55:06 +00:00
func ( endpoint * Endpoint ) Delete ( ctx context . Context , delete * pb . PieceDeleteRequest ) ( _ * pb . PieceDeleteResponse , err error ) {
2019-08-02 23:49:39 +01:00
defer monLiveRequests ( & ctx ) ( & err )
2019-03-18 10:55:06 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-10-23 20:43:43 +01:00
atomic . AddInt32 ( & endpoint . liveRequests , 1 )
defer atomic . AddInt32 ( & endpoint . liveRequests , - 1 )
2019-07-03 14:47:55 +01:00
2019-10-03 19:31:39 +01:00
endpoint . pingStats . WasPinged ( time . Now ( ) )
2019-03-18 10:55:06 +00:00
if delete . Limit . Action != pb . PieceAction_DELETE {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"expected delete action got %v" , delete . Limit . Action )
2019-03-18 10:55:06 +00:00
}
2019-07-30 17:58:08 +01:00
if err := endpoint . verifyOrderLimit ( ctx , delete . Limit ) ; err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Wrap ( rpcstatus . Unauthenticated , err )
2019-03-18 10:55:06 +00:00
}
2019-08-08 02:47:30 +01:00
if err := endpoint . store . Delete ( ctx , delete . Limit . SatelliteId , delete . Limit . PieceId ) ; err != nil {
2019-03-18 10:55:06 +00:00
// explicitly ignoring error because the errors
2019-12-02 11:18:20 +00:00
// TODO: https://storjlabs.atlassian.net/browse/V3-3222
// report rpc status of internal server error or not found error,
// e.g. not found might happen when we get a deletion request after garbage
// collection has deleted it
2019-11-05 21:04:07 +00:00
endpoint . log . Error ( "delete failed" , zap . Stringer ( "Satellite ID" , delete . Limit . SatelliteId ) , zap . Stringer ( "Piece ID" , delete . Limit . PieceId ) , zap . Error ( err ) )
2019-03-18 10:55:06 +00:00
} else {
2019-11-05 21:04:07 +00:00
endpoint . log . Info ( "deleted" , zap . Stringer ( "Satellite ID" , delete . Limit . SatelliteId ) , zap . Stringer ( "Piece ID" , delete . Limit . PieceId ) )
2019-03-18 10:55:06 +00:00
}
return & pb . PieceDeleteResponse { } , nil
}
2019-12-18 15:33:12 +00:00
// DeletePieces delete a list of pieces on satellite request.
func ( endpoint * Endpoint ) DeletePieces (
ctx context . Context , req * pb . DeletePiecesRequest ,
) ( _ * pb . DeletePiecesResponse , err error ) {
defer mon . Task ( ) ( & ctx , req . PieceIds ) ( & err )
peer , err := identity . PeerIdentityFromContext ( ctx )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Wrap ( rpcstatus . Unauthenticated , err )
2019-12-18 15:33:12 +00:00
}
err = endpoint . trust . VerifySatelliteID ( ctx , peer . ID )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Error ( rpcstatus . PermissionDenied , "delete pieces called with untrusted ID" )
2019-12-18 15:33:12 +00:00
}
2020-04-24 00:09:15 +01:00
unhandled := endpoint . pieceDeleter . Enqueue ( ctx , peer . ID , req . PieceIds )
return & pb . DeletePiecesResponse {
UnhandledCount : int64 ( unhandled ) ,
} , nil
2019-12-18 15:33:12 +00:00
}
2019-09-12 22:09:46 +01:00
// Upload handles uploading a piece on piece store.
2020-05-11 06:20:34 +01:00
func ( endpoint * Endpoint ) Upload ( stream pb . DRPCPiecestore_UploadStream ) ( err error ) {
2019-03-18 10:55:06 +00:00
ctx := stream . Context ( )
2019-08-02 23:49:39 +01:00
defer monLiveRequests ( & ctx ) ( & err )
2019-03-18 10:55:06 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-07-03 14:47:55 +01:00
2019-10-23 20:43:43 +01:00
liveRequests := atomic . AddInt32 ( & endpoint . liveRequests , 1 )
defer atomic . AddInt32 ( & endpoint . liveRequests , - 1 )
2019-07-03 14:47:55 +01:00
2019-10-03 19:31:39 +01:00
endpoint . pingStats . WasPinged ( time . Now ( ) )
2020-05-11 06:20:34 +01:00
if endpoint . config . MaxConcurrentRequests > 0 && int ( liveRequests ) > endpoint . config . MaxConcurrentRequests {
2020-04-15 20:32:22 +01:00
endpoint . log . Error ( "upload rejected, too many requests" ,
zap . Int32 ( "live requests" , liveRequests ) ,
2020-05-11 06:20:34 +01:00
zap . Int ( "requestLimit" , endpoint . config . MaxConcurrentRequests ) ,
2020-04-15 20:32:22 +01:00
)
2020-05-11 06:20:34 +01:00
errMsg := fmt . Sprintf ( "storage node overloaded, request limit: %d" , endpoint . config . MaxConcurrentRequests )
2020-04-15 20:32:22 +01:00
return rpcstatus . Error ( rpcstatus . Unavailable , errMsg )
2019-07-03 14:47:55 +01:00
}
2019-05-30 16:44:47 +01:00
startTime := time . Now ( ) . UTC ( )
2019-03-18 10:55:06 +00:00
// TODO: set maximum message size
2019-11-19 01:50:56 +00:00
// N.B.: we are only allowed to use message if the returned error is nil. it would be
// a race condition otherwise as Run does not wait for the closure to exit.
2019-03-18 10:55:06 +00:00
var message * pb . PieceUploadRequest
2019-11-19 01:50:56 +00:00
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
message , err = stream . Recv ( )
return err
} )
2019-03-18 10:55:06 +00:00
switch {
case err != nil :
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
case message == nil :
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected a message" )
2019-03-18 10:55:06 +00:00
case message . Limit == nil :
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected order limit as the first message" )
2019-03-18 10:55:06 +00:00
}
limit := message . Limit
2019-06-11 04:30:17 +01:00
2019-03-18 10:55:06 +00:00
if limit . Action != pb . PieceAction_PUT && limit . Action != pb . PieceAction_PUT_REPAIR {
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument , "expected put or put repair action got %v" , limit . Action )
2019-03-18 10:55:06 +00:00
}
2019-07-30 17:58:08 +01:00
if err := endpoint . verifyOrderLimit ( ctx , limit ) ; err != nil {
return err
2019-03-18 10:55:06 +00:00
}
2020-01-30 15:39:08 +00:00
availableSpace , err := endpoint . monitor . AvailableSpace ( ctx )
if err != nil {
return rpcstatus . Wrap ( rpcstatus . Internal , err )
}
2020-02-12 21:08:28 +00:00
// if availableSpace has fallen below ReportCapacityThreshold, report capacity to satellites
defer func ( ) {
if availableSpace < endpoint . config . ReportCapacityThreshold . Int64 ( ) {
2020-02-26 02:39:44 +00:00
endpoint . monitor . NotifyLowDisk ( )
2020-02-12 21:08:28 +00:00
}
} ( )
2020-07-21 19:39:21 +01:00
if availableSpace < limit . Limit {
return rpcstatus . Errorf ( rpcstatus . Aborted , "not enough available disk space, have: %v, need: %v" , availableSpace , limit . Limit )
2020-06-02 14:01:54 +01:00
}
2019-05-30 16:44:47 +01:00
var pieceWriter * pieces . Writer
2020-07-06 22:17:37 +01:00
// committed is set to true when the piece is committed.
// It is used to distinguish successful pieces where the uplink cancels the connections,
// and pieces that were actually canceled before being completed.
var committed bool
2019-03-18 10:55:06 +00:00
defer func ( ) {
2019-05-30 16:44:47 +01:00
endTime := time . Now ( ) . UTC ( )
dt := endTime . Sub ( startTime )
uploadSize := int64 ( 0 )
if pieceWriter != nil {
uploadSize = pieceWriter . Size ( )
}
uploadRate := float64 ( 0 )
if dt . Seconds ( ) > 0 {
uploadRate = float64 ( uploadSize ) / dt . Seconds ( )
}
uploadDuration := dt . Nanoseconds ( )
2020-07-06 22:17:37 +01:00
if err != nil && ! errs2 . IsCanceled ( err ) {
2019-06-04 14:22:00 +01:00
mon . Meter ( "upload_failure_byte_meter" ) . Mark64 ( uploadSize )
2019-05-30 16:44:47 +01:00
mon . IntVal ( "upload_failure_size_bytes" ) . Observe ( uploadSize )
mon . IntVal ( "upload_failure_duration_ns" ) . Observe ( uploadDuration )
mon . FloatVal ( "upload_failure_rate_bytes_per_sec" ) . Observe ( uploadRate )
2020-02-04 11:03:16 +00:00
endpoint . log . Error ( "upload failed" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) , zap . Error ( err ) )
2020-07-06 22:17:37 +01:00
} else if errs2 . IsCanceled ( err ) && ! committed {
mon . Meter ( "upload_cancel_byte_meter" ) . Mark64 ( uploadSize )
mon . IntVal ( "upload_cancel_size_bytes" ) . Observe ( uploadSize )
mon . IntVal ( "upload_cancel_duration_ns" ) . Observe ( uploadDuration )
mon . FloatVal ( "upload_cancel_rate_bytes_per_sec" ) . Observe ( uploadRate )
endpoint . log . Info ( "upload canceled" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) )
2019-03-18 10:55:06 +00:00
} else {
2019-06-04 14:22:00 +01:00
mon . Meter ( "upload_success_byte_meter" ) . Mark64 ( uploadSize )
2019-05-30 16:44:47 +01:00
mon . IntVal ( "upload_success_size_bytes" ) . Observe ( uploadSize )
mon . IntVal ( "upload_success_duration_ns" ) . Observe ( uploadDuration )
mon . FloatVal ( "upload_success_rate_bytes_per_sec" ) . Observe ( uploadRate )
2019-11-05 21:04:07 +00:00
endpoint . log . Info ( "uploaded" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) )
2019-03-18 10:55:06 +00:00
}
} ( )
2020-01-30 15:39:08 +00:00
endpoint . log . Info ( "upload started" ,
zap . Stringer ( "Piece ID" , limit . PieceId ) ,
zap . Stringer ( "Satellite ID" , limit . SatelliteId ) ,
zap . Stringer ( "Action" , limit . Action ) ,
zap . Int64 ( "Available Space" , availableSpace ) )
2019-05-30 16:44:47 +01:00
pieceWriter , err = endpoint . store . Writer ( ctx , limit . SatelliteId , limit . PieceId )
2019-03-18 10:55:06 +00:00
if err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
defer func ( ) {
// cancel error if it hasn't been committed
2019-06-04 13:31:39 +01:00
if cancelErr := pieceWriter . Cancel ( ctx ) ; cancelErr != nil {
2020-02-04 11:03:16 +00:00
if errs2 . IsCanceled ( cancelErr ) {
return
}
2019-04-17 11:09:44 +01:00
endpoint . log . Error ( "error during canceling a piece write" , zap . Error ( cancelErr ) )
2019-03-18 10:55:06 +00:00
}
} ( )
2019-10-16 03:17:17 +01:00
orderSaved := false
2019-07-01 16:54:11 +01:00
largestOrder := pb . Order { }
2019-10-16 03:17:17 +01:00
// Ensure that the order is saved even in the face of an error. In the
// success path, the order will be saved just before sending the response
// and closing the stream (in which case, orderSaved will be true).
defer func ( ) {
if ! orderSaved {
endpoint . saveOrder ( ctx , limit , & largestOrder )
}
} ( )
2019-03-18 10:55:06 +00:00
for {
2019-11-19 01:50:56 +00:00
// TODO: reuse messages to avoid allocations
// N.B.: we are only allowed to use message if the returned error is nil. it would be
// a race condition otherwise as Run does not wait for the closure to exit.
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
message , err = stream . Recv ( )
return err
} )
if errs . Is ( err , io . EOF ) {
return rpcstatus . Error ( rpcstatus . InvalidArgument , "unexpected EOF" )
2019-03-18 10:55:06 +00:00
} else if err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-11-19 01:50:56 +00:00
2019-03-18 10:55:06 +00:00
if message == nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected a message" )
2019-03-18 10:55:06 +00:00
}
2019-07-08 15:26:19 +01:00
if message . Order == nil && message . Chunk == nil && message . Done == nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected a message" )
2019-07-08 15:26:19 +01:00
}
2019-03-18 10:55:06 +00:00
2019-07-08 15:26:19 +01:00
if message . Order != nil {
2019-07-11 21:51:40 +01:00
if err := endpoint . VerifyOrder ( ctx , limit , message . Order , largestOrder . Amount ) ; err != nil {
2019-03-18 10:55:06 +00:00
return err
}
largestOrder = * message . Order
2019-07-08 15:26:19 +01:00
}
2019-03-18 10:55:06 +00:00
2019-07-08 15:26:19 +01:00
if message . Chunk != nil {
2019-03-18 10:55:06 +00:00
if message . Chunk . Offset != pieceWriter . Size ( ) {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "chunk out of order" )
2019-03-18 10:55:06 +00:00
}
2019-04-15 11:12:22 +01:00
chunkSize := int64 ( len ( message . Chunk . Data ) )
if largestOrder . Amount < pieceWriter . Size ( ) + chunkSize {
2019-03-18 10:55:06 +00:00
// TODO: should we write currently and give a chance for uplink to remedy the situation?
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"not enough allocated, allocated=%v writing=%v" ,
largestOrder . Amount , pieceWriter . Size ( ) + int64 ( len ( message . Chunk . Data ) ) )
2019-03-18 10:55:06 +00:00
}
2019-04-15 11:12:22 +01:00
availableSpace -= chunkSize
if availableSpace < 0 {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . Internal , "out of space" )
2019-04-15 11:12:22 +01:00
}
2019-03-18 10:55:06 +00:00
if _ , err := pieceWriter . Write ( message . Chunk . Data ) ; err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-07-08 15:26:19 +01:00
}
2019-03-18 10:55:06 +00:00
2019-07-08 15:26:19 +01:00
if message . Done != nil {
2019-08-08 02:47:30 +01:00
calculatedHash := pieceWriter . Hash ( )
if err := endpoint . VerifyPieceHash ( ctx , limit , message . Done , calculatedHash ) ; err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-07-15 16:26:18 +01:00
if message . Done . PieceSize != pieceWriter . Size ( ) {
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"Size of finished piece does not match size declared by uplink! %d != %d" ,
2019-08-08 02:47:30 +01:00
message . Done . PieceSize , pieceWriter . Size ( ) )
2019-03-18 10:55:06 +00:00
}
{
2019-08-08 02:47:30 +01:00
info := & pb . PieceHeader {
Hash : calculatedHash ,
CreationTime : message . Done . Timestamp ,
Signature : message . Done . GetSignature ( ) ,
OrderLimit : * limit ,
2019-03-18 10:55:06 +00:00
}
2019-08-08 02:47:30 +01:00
if err := pieceWriter . Commit ( ctx , info ) ; err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-08-08 02:47:30 +01:00
}
2020-07-06 22:17:37 +01:00
committed = true
2019-08-08 02:47:30 +01:00
if ! limit . PieceExpiration . IsZero ( ) {
err := endpoint . store . SetExpiration ( ctx , limit . SatelliteId , limit . PieceId , limit . PieceExpiration )
if err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-08-08 02:47:30 +01:00
}
2019-03-18 10:55:06 +00:00
}
}
2019-06-05 14:47:01 +01:00
storageNodeHash , err := signing . SignPieceHash ( ctx , endpoint . signer , & pb . PieceHash {
2019-07-03 17:14:37 +01:00
PieceId : limit . PieceId ,
2019-08-08 02:47:30 +01:00
Hash : calculatedHash ,
2019-07-03 17:14:37 +01:00
PieceSize : pieceWriter . Size ( ) ,
Timestamp : time . Now ( ) ,
2019-03-18 10:55:06 +00:00
} )
if err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-10-16 03:17:17 +01:00
// Save the order before completing the call. Set orderSaved so
// that the defer above does not also save.
orderSaved = true
endpoint . saveOrder ( ctx , limit , & largestOrder )
2019-11-19 01:50:56 +00:00
closeErr := rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
return stream . SendAndClose ( & pb . PieceUploadResponse { Done : storageNodeHash } )
2019-03-18 10:55:06 +00:00
} )
2019-11-19 01:50:56 +00:00
if errs . Is ( closeErr , io . EOF ) {
closeErr = nil
}
if closeErr != nil {
return rpcstatus . Wrap ( rpcstatus . Internal , closeErr )
}
return nil
2019-03-18 10:55:06 +00:00
}
}
}
2020-05-11 06:20:34 +01:00
// Download handles Downloading a piece on piecestore.
func ( endpoint * Endpoint ) Download ( stream pb . DRPCPiecestore_DownloadStream ) ( err error ) {
2019-03-18 10:55:06 +00:00
ctx := stream . Context ( )
2019-08-02 23:49:39 +01:00
defer monLiveRequests ( & ctx ) ( & err )
2019-03-18 10:55:06 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-07-03 14:47:55 +01:00
2019-10-23 20:43:43 +01:00
atomic . AddInt32 ( & endpoint . liveRequests , 1 )
defer atomic . AddInt32 ( & endpoint . liveRequests , - 1 )
2019-07-03 14:47:55 +01:00
2019-05-30 16:44:47 +01:00
startTime := time . Now ( ) . UTC ( )
2019-03-18 10:55:06 +00:00
2019-10-03 19:31:39 +01:00
endpoint . pingStats . WasPinged ( time . Now ( ) )
2019-03-18 10:55:06 +00:00
// TODO: set maximum message size
var message * pb . PieceDownloadRequest
2019-11-19 01:50:56 +00:00
// N.B.: we are only allowed to use message if the returned error is nil. it would be
// a race condition otherwise as Run does not wait for the closure to exit.
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
message , err = stream . Recv ( )
return err
} )
2019-03-18 10:55:06 +00:00
if err != nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
if message . Limit == nil || message . Chunk == nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected order limit and chunk as the first message" )
2019-03-18 10:55:06 +00:00
}
limit , chunk := message . Limit , message . Chunk
if limit . Action != pb . PieceAction_GET && limit . Action != pb . PieceAction_GET_REPAIR && limit . Action != pb . PieceAction_GET_AUDIT {
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"expected get or get repair or audit action got %v" , limit . Action )
2019-03-18 10:55:06 +00:00
}
if chunk . ChunkSize > limit . Limit {
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"requested more that order limit allows, limit=%v requested=%v" , limit . Limit , chunk . ChunkSize )
2019-03-18 10:55:06 +00:00
}
2020-04-23 18:20:47 +01:00
endpoint . log . Info ( "download started" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) )
2019-07-30 17:58:08 +01:00
if err := endpoint . verifyOrderLimit ( ctx , limit ) ; err != nil {
2020-04-23 18:20:47 +01:00
mon . Meter ( "download_verify_orderlimit_failed" ) . Mark ( 1 )
endpoint . log . Error ( "download failed" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) , zap . Error ( err ) )
2019-11-19 01:50:56 +00:00
return err
2019-03-18 10:55:06 +00:00
}
2019-05-30 16:44:47 +01:00
var pieceReader * pieces . Reader
2019-03-18 10:55:06 +00:00
defer func ( ) {
2019-05-30 16:44:47 +01:00
endTime := time . Now ( ) . UTC ( )
dt := endTime . Sub ( startTime )
downloadSize := int64 ( 0 )
if pieceReader != nil {
downloadSize = pieceReader . Size ( )
}
downloadRate := float64 ( 0 )
if dt . Seconds ( ) > 0 {
downloadRate = float64 ( downloadSize ) / dt . Seconds ( )
}
downloadDuration := dt . Nanoseconds ( )
2020-02-04 11:03:16 +00:00
if errs2 . IsCanceled ( err ) {
mon . Meter ( "download_cancel_byte_meter" ) . Mark64 ( downloadSize )
mon . IntVal ( "download_cancel_size_bytes" ) . Observe ( downloadSize )
mon . IntVal ( "download_cancel_duration_ns" ) . Observe ( downloadDuration )
mon . FloatVal ( "download_cancel_rate_bytes_per_sec" ) . Observe ( downloadRate )
2020-04-30 16:42:16 +01:00
endpoint . log . Info ( "download canceled" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) )
2020-02-04 11:03:16 +00:00
} else if err != nil {
2019-06-04 14:22:00 +01:00
mon . Meter ( "download_failure_byte_meter" ) . Mark64 ( downloadSize )
2019-05-30 16:44:47 +01:00
mon . IntVal ( "download_failure_size_bytes" ) . Observe ( downloadSize )
mon . IntVal ( "download_failure_duration_ns" ) . Observe ( downloadDuration )
mon . FloatVal ( "download_failure_rate_bytes_per_sec" ) . Observe ( downloadRate )
2020-02-04 11:03:16 +00:00
endpoint . log . Error ( "download failed" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) , zap . Error ( err ) )
2019-03-18 10:55:06 +00:00
} else {
2019-06-04 14:22:00 +01:00
mon . Meter ( "download_success_byte_meter" ) . Mark64 ( downloadSize )
2019-05-30 16:44:47 +01:00
mon . IntVal ( "download_success_size_bytes" ) . Observe ( downloadSize )
mon . IntVal ( "download_success_duration_ns" ) . Observe ( downloadDuration )
mon . FloatVal ( "download_success_rate_bytes_per_sec" ) . Observe ( downloadRate )
2019-11-05 21:04:07 +00:00
endpoint . log . Info ( "downloaded" , zap . Stringer ( "Piece ID" , limit . PieceId ) , zap . Stringer ( "Satellite ID" , limit . SatelliteId ) , zap . Stringer ( "Action" , limit . Action ) )
2019-03-18 10:55:06 +00:00
}
} ( )
2019-05-30 16:44:47 +01:00
pieceReader , err = endpoint . store . Reader ( ctx , limit . SatelliteId , limit . PieceId )
2019-03-18 10:55:06 +00:00
if err != nil {
2019-06-03 10:17:09 +01:00
if os . IsNotExist ( err ) {
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . NotFound , err )
2019-06-03 10:17:09 +01:00
}
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
defer func ( ) {
err := pieceReader . Close ( ) // similarly how transcation Rollback works
if err != nil {
2020-02-04 11:03:16 +00:00
if errs2 . IsCanceled ( err ) {
return
}
2019-03-18 10:55:06 +00:00
// no reason to report this error to the uplink
endpoint . log . Error ( "failed to close piece reader" , zap . Error ( err ) )
}
} ( )
2019-08-26 19:57:41 +01:00
// for repair traffic, send along the PieceHash and original OrderLimit for validation
// before sending the piece itself
if message . Limit . Action == pb . PieceAction_GET_REPAIR {
2019-11-13 19:15:31 +00:00
pieceHash , orderLimit , err := endpoint . store . GetHashAndLimit ( ctx , limit . SatelliteId , limit . PieceId , pieceReader )
if err != nil {
endpoint . log . Error ( "could not get hash and order limit" , zap . Error ( err ) )
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-08-26 19:57:41 +01:00
}
2019-11-19 01:50:56 +00:00
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
return stream . Send ( & pb . PieceDownloadResponse { Hash : & pieceHash , Limit : & orderLimit } )
} )
2019-08-26 19:57:41 +01:00
if err != nil {
endpoint . log . Error ( "error sending hash and order limit" , zap . Error ( err ) )
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-08-26 19:57:41 +01:00
}
}
2019-03-18 10:55:06 +00:00
// TODO: verify chunk.Size behavior logic with regards to reading all
if chunk . Offset + chunk . ChunkSize > pieceReader . Size ( ) {
2019-11-19 01:50:56 +00:00
return rpcstatus . Errorf ( rpcstatus . InvalidArgument ,
"requested more data than available, requesting=%v available=%v" ,
chunk . Offset + chunk . ChunkSize , pieceReader . Size ( ) )
2019-03-18 10:55:06 +00:00
}
throttle := sync2 . NewThrottle ( )
// TODO: see whether this can be implemented without a goroutine
group , ctx := errgroup . WithContext ( ctx )
group . Go ( func ( ) ( err error ) {
var maximumChunkSize = 1 * memory . MiB . Int64 ( )
currentOffset := chunk . Offset
unsentAmount := chunk . ChunkSize
for unsentAmount > 0 {
tryToSend := min ( unsentAmount , maximumChunkSize )
// TODO: add timeout here
chunkSize , err := throttle . ConsumeOrWait ( tryToSend )
if err != nil {
// this can happen only because uplink decided to close the connection
return nil
}
chunkData := make ( [ ] byte , chunkSize )
_ , err = pieceReader . Seek ( currentOffset , io . SeekStart )
if err != nil {
2019-08-26 19:57:41 +01:00
endpoint . log . Error ( "error seeking on piecereader" , zap . Error ( err ) )
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-07-25 09:22:15 +01:00
// ReadFull is required to ensure we are sending the right amount of data.
_ , err = io . ReadFull ( pieceReader , chunkData )
2019-03-18 10:55:06 +00:00
if err != nil {
2019-08-26 19:57:41 +01:00
endpoint . log . Error ( "error reading from piecereader" , zap . Error ( err ) )
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
2019-11-19 01:50:56 +00:00
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
return stream . Send ( & pb . PieceDownloadResponse {
Chunk : & pb . PieceDownloadResponse_Chunk {
Offset : currentOffset ,
Data : chunkData ,
} ,
} )
2019-03-18 10:55:06 +00:00
} )
2019-11-19 01:50:56 +00:00
if errs . Is ( err , io . EOF ) {
2019-03-18 10:55:06 +00:00
// err is io.EOF when uplink asked for a piece, but decided not to retrieve it,
// no need to propagate it
2019-11-19 01:50:56 +00:00
return nil
}
if err != nil {
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
currentOffset += chunkSize
unsentAmount -= chunkSize
}
return nil
} )
recvErr := func ( ) ( err error ) {
2019-07-01 16:54:11 +01:00
largestOrder := pb . Order { }
2019-07-31 15:13:39 +01:00
defer endpoint . saveOrder ( ctx , limit , & largestOrder )
2019-03-18 10:55:06 +00:00
// ensure that we always terminate sending goroutine
defer throttle . Fail ( io . EOF )
for {
2019-11-19 01:50:56 +00:00
// N.B.: we are only allowed to use message if the returned error is nil. it would be
// a race condition otherwise as Run does not wait for the closure to exit.
err = rpctimeout . Run ( ctx , endpoint . config . StreamOperationTimeout , func ( _ context . Context ) ( err error ) {
message , err = stream . Recv ( )
return err
} )
if errs . Is ( err , io . EOF ) {
2019-08-23 16:16:43 +01:00
// err is io.EOF or canceled when uplink closed the connection, no need to return error
2019-11-19 01:50:56 +00:00
return nil
}
if errs2 . IsCanceled ( err ) {
return nil
}
if err != nil {
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
if message == nil || message . Order == nil {
2019-11-19 01:50:56 +00:00
return rpcstatus . Error ( rpcstatus . InvalidArgument , "expected order as the message" )
2019-03-18 10:55:06 +00:00
}
2019-07-11 21:51:40 +01:00
if err := endpoint . VerifyOrder ( ctx , limit , message . Order , largestOrder . Amount ) ; err != nil {
2019-03-18 10:55:06 +00:00
return err
}
2019-04-15 11:12:22 +01:00
chunkSize := message . Order . Amount - largestOrder . Amount
if err := throttle . Produce ( chunkSize ) ; err != nil {
2019-03-18 10:55:06 +00:00
// shouldn't happen since only receiving side is calling Fail
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-03-18 10:55:06 +00:00
}
largestOrder = * message . Order
}
} ( )
// ensure we wait for sender to complete
sendErr := group . Wait ( )
2019-11-19 01:50:56 +00:00
return rpcstatus . Wrap ( rpcstatus . Internal , errs . Combine ( sendErr , recvErr ) )
2019-03-18 10:55:06 +00:00
}
2019-07-31 15:13:39 +01:00
// saveOrder saves the order with all necessary information. It assumes it has been already verified.
func ( endpoint * Endpoint ) saveOrder ( ctx context . Context , limit * pb . OrderLimit , order * pb . Order ) {
2020-01-15 08:14:10 +00:00
// We always want to save order to the database to be able to settle.
ctx = context2 . WithoutCancellation ( ctx )
2020-01-14 11:07:35 +00:00
2019-06-04 13:31:39 +01:00
var err error
defer mon . Task ( ) ( & ctx ) ( & err )
2019-03-18 10:55:06 +00:00
// TODO: do this in a goroutine
if order == nil || order . Amount <= 0 {
return
}
2020-07-01 23:05:01 +01:00
err = endpoint . ordersStore . Enqueue ( & orders . Info {
2019-07-09 22:33:45 +01:00
Limit : limit ,
Order : order ,
2019-03-18 10:55:06 +00:00
} )
if err != nil {
endpoint . log . Error ( "failed to add order" , zap . Error ( err ) )
} else {
2020-01-15 08:14:10 +00:00
err = endpoint . usage . Add ( ctx , limit . SatelliteId , limit . Action , order . Amount , time . Now ( ) )
2019-03-18 10:55:06 +00:00
if err != nil {
endpoint . log . Error ( "failed to add bandwidth usage" , zap . Error ( err ) )
}
}
}
2020-07-16 15:18:02 +01:00
// RestoreTrash restores all trashed items for the satellite issuing the call.
2019-11-20 16:28:49 +00:00
func ( endpoint * Endpoint ) RestoreTrash ( ctx context . Context , restoreTrashReq * pb . RestoreTrashRequest ) ( res * pb . RestoreTrashResponse , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
peer , err := identity . PeerIdentityFromContext ( ctx )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Wrap ( rpcstatus . Unauthenticated , err )
2019-11-20 16:28:49 +00:00
}
err = endpoint . trust . VerifySatelliteID ( ctx , peer . ID )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Error ( rpcstatus . PermissionDenied , "RestoreTrash called with untrusted ID" )
2019-11-20 16:28:49 +00:00
}
err = endpoint . store . RestoreTrash ( ctx , peer . ID )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Wrap ( rpcstatus . Internal , err )
2019-11-20 16:28:49 +00:00
}
return & pb . RestoreTrashResponse { } , nil
}
2020-07-16 15:18:02 +01:00
// Retain keeps only piece ids specified in the request.
2019-07-11 21:04:22 +01:00
func ( endpoint * Endpoint ) Retain ( ctx context . Context , retainReq * pb . RetainRequest ) ( res * pb . RetainResponse , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
2019-07-26 21:49:08 +01:00
// if retain status is disabled, quit immediately
2019-08-19 19:52:47 +01:00
if endpoint . retain . Status ( ) == retain . Disabled {
2019-07-26 21:49:08 +01:00
return & pb . RetainResponse { } , nil
}
2019-07-10 14:41:47 +01:00
peer , err := identity . PeerIdentityFromContext ( ctx )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Wrap ( rpcstatus . Unauthenticated , err )
2019-07-11 21:04:22 +01:00
}
err = endpoint . trust . VerifySatelliteID ( ctx , peer . ID )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Errorf ( rpcstatus . PermissionDenied , "retain called with untrusted ID" )
2019-07-10 14:41:47 +01:00
}
filter , err := bloomfilter . NewFromBytes ( retainReq . GetFilter ( ) )
if err != nil {
2019-11-19 01:50:56 +00:00
return nil , rpcstatus . Wrap ( rpcstatus . InvalidArgument , err )
2019-07-10 14:41:47 +01:00
}
2019-08-19 19:52:47 +01:00
// the queue function will update the created before time based on the configurable retain buffer
queued := endpoint . retain . Queue ( retain . Request {
SatelliteID : peer . ID ,
CreatedBefore : retainReq . GetCreationDate ( ) ,
Filter : filter ,
2019-08-08 02:47:30 +01:00
} )
2019-08-19 19:52:47 +01:00
if ! queued {
2019-11-05 21:04:07 +00:00
endpoint . log . Debug ( "Retain job not queued for satellite" , zap . Stringer ( "Satellite ID" , peer . ID ) )
2019-07-10 14:41:47 +01:00
}
2019-07-26 21:49:08 +01:00
2019-07-10 14:41:47 +01:00
return & pb . RetainResponse { } , nil
}
2019-09-12 10:26:55 +01:00
// TestLiveRequestCount returns the current number of live requests.
func ( endpoint * Endpoint ) TestLiveRequestCount ( ) int32 {
2019-10-23 20:43:43 +01:00
return atomic . LoadInt32 ( & endpoint . liveRequests )
2019-09-12 10:26:55 +01:00
}
2020-07-16 15:18:02 +01:00
// min finds the min of two values.
2019-03-18 10:55:06 +00:00
func min ( a , b int64 ) int64 {
if a < b {
return a
}
return b
}