2019-03-28 20:09:23 +00:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package orders
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2019-07-11 23:44:47 +01:00
|
|
|
"math"
|
2020-05-14 16:45:35 +01:00
|
|
|
mathrand "math/rand"
|
2020-01-27 20:01:37 +00:00
|
|
|
"sync"
|
2019-03-28 20:09:23 +00:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/zeebo/errs"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/pb"
|
|
|
|
"storj.io/common/signing"
|
|
|
|
"storj.io/common/storj"
|
2020-03-30 10:08:50 +01:00
|
|
|
"storj.io/common/uuid"
|
2020-08-28 12:56:09 +01:00
|
|
|
"storj.io/storj/satellite/metainfo/metabase"
|
2019-07-28 06:55:36 +01:00
|
|
|
"storj.io/storj/satellite/overlay"
|
2020-02-21 14:07:29 +00:00
|
|
|
"storj.io/uplink/private/eestream"
|
2019-03-28 20:09:23 +00:00
|
|
|
)
|
|
|
|
|
2020-11-18 21:39:13 +00:00
|
|
|
var (
|
|
|
|
// ErrDownloadFailedNotEnoughPieces is returned when download failed due to missing pieces.
|
|
|
|
ErrDownloadFailedNotEnoughPieces = errs.Class("not enough pieces for download")
|
|
|
|
// ErrDecryptOrderMetadata is returned when a step of decrypting metadata fails.
|
|
|
|
ErrDecryptOrderMetadata = errs.Class("decrytping order metadata")
|
|
|
|
)
|
2019-12-04 21:24:36 +00:00
|
|
|
|
2019-06-21 11:38:40 +01:00
|
|
|
// Config is a configuration struct for orders Service.
|
|
|
|
type Config struct {
|
2020-08-27 15:30:04 +01:00
|
|
|
EncryptionKeys EncryptionKeys `help:"encryption keys to encrypt info in orders" default:""`
|
2020-07-24 18:13:15 +01:00
|
|
|
IncludeEncryptedMetadata bool `help:"include encrypted metadata in the order limit" default:"false"`
|
satellite/orders: 3-phase rollout
This adds a config flag orders.window-endpoint-rollout-phase
that can take on the values phase1, phase2 or phase3.
In phase1, the current orders endpoint continues to work as
usual, and the windowed orders endpoint uses the same backend
as the current one (but also does a bit extra).
In phase2, the current orders endpoint is disabled and the
windowed orders endpoint continues to use the same backend.
In phase3, the current orders endpoint is still disabled and
the windowed orders endpoint uses the new backend that requires
much less database traffic and state.
The intention is to deploy in phase1, roll out code to nodes
to have them use the windowed endpoint, switch to phase2, wait
a couple days for all existing orders to expire, then switch
to phase3.
Additionally, it fixes a bug where a node could submit a bunch
of orders and rack up charges for a bucket.
Change-Id: Ifdc10e09ae1645159cbec7ace687dcb2d594c76d
2020-07-21 17:53:32 +01:00
|
|
|
Expiration time.Duration `help:"how long until an order expires" default:"48h"` // 2 days
|
|
|
|
SettlementBatchSize int `help:"how many orders to batch per transaction" default:"250"`
|
|
|
|
FlushBatchSize int `help:"how many items in the rollups write cache before they are flushed to the database" devDefault:"20" releaseDefault:"10000"`
|
|
|
|
FlushInterval time.Duration `help:"how often to flush the rollups write cache to the database" devDefault:"30s" releaseDefault:"1m"`
|
|
|
|
ReportedRollupsReadBatchSize int `help:"how many records to read in a single transaction when calculating billable bandwidth" default:"1000"`
|
|
|
|
NodeStatusLogging bool `hidden:"true" help:"deprecated, log the offline/disqualification status of nodes" default:"false"`
|
2020-10-20 19:54:17 +01:00
|
|
|
WindowEndpointRolloutPhase WindowEndpointRolloutPhase `help:"rollout phase for the windowed endpoint" default:"phase3"`
|
2020-10-09 21:22:57 +01:00
|
|
|
OrdersSemaphoreSize int `help:"how many concurrent orders to process at once. zero is unlimited" default:"2"`
|
2019-06-21 11:38:40 +01:00
|
|
|
}
|
|
|
|
|
2020-06-25 15:47:44 +01:00
|
|
|
// BucketsDB returns information about buckets.
|
|
|
|
type BucketsDB interface {
|
|
|
|
// GetBucketID returns an existing bucket id.
|
2020-07-24 18:13:15 +01:00
|
|
|
GetBucketID(ctx context.Context, bucket metabase.BucketLocation) (id uuid.UUID, err error)
|
2020-06-25 15:47:44 +01:00
|
|
|
}
|
|
|
|
|
2019-03-28 20:09:23 +00:00
|
|
|
// Service for creating order limits.
|
2019-09-10 14:24:16 +01:00
|
|
|
//
|
|
|
|
// architecture: Service
|
2019-03-28 20:09:23 +00:00
|
|
|
type Service struct {
|
2020-08-27 15:30:04 +01:00
|
|
|
log *zap.Logger
|
|
|
|
satellite signing.Signer
|
|
|
|
overlay *overlay.Service
|
|
|
|
orders DB
|
|
|
|
buckets BucketsDB
|
|
|
|
|
2020-07-24 18:13:15 +01:00
|
|
|
includeEncryptedMetadata bool
|
|
|
|
encryptionKeys EncryptionKeys
|
|
|
|
|
2020-07-24 12:08:58 +01:00
|
|
|
satelliteAddress *pb.NodeAddress
|
|
|
|
orderExpiration time.Duration
|
2020-08-27 15:30:04 +01:00
|
|
|
|
|
|
|
rngMu sync.Mutex
|
|
|
|
rng *mathrand.Rand
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewService creates new service for creating order limits.
|
2019-07-11 23:44:47 +01:00
|
|
|
func NewService(
|
2019-08-06 17:35:59 +01:00
|
|
|
log *zap.Logger, satellite signing.Signer, overlay *overlay.Service,
|
2020-06-25 15:47:44 +01:00
|
|
|
orders DB, buckets BucketsDB,
|
2020-08-27 15:30:04 +01:00
|
|
|
config Config,
|
|
|
|
satelliteAddress *pb.NodeAddress,
|
2020-07-24 18:13:15 +01:00
|
|
|
) (*Service, error) {
|
|
|
|
if config.IncludeEncryptedMetadata && config.EncryptionKeys.Default.IsZero() {
|
|
|
|
return nil, Error.New("encryption keys must be specified to include encrypted metadata")
|
|
|
|
}
|
|
|
|
|
2019-03-28 20:09:23 +00:00
|
|
|
return &Service{
|
2020-08-27 15:30:04 +01:00
|
|
|
log: log,
|
|
|
|
satellite: satellite,
|
|
|
|
overlay: overlay,
|
|
|
|
orders: orders,
|
|
|
|
buckets: buckets,
|
|
|
|
|
2020-07-24 18:13:15 +01:00
|
|
|
includeEncryptedMetadata: config.IncludeEncryptedMetadata,
|
|
|
|
encryptionKeys: config.EncryptionKeys,
|
|
|
|
|
2020-07-24 12:08:58 +01:00
|
|
|
satelliteAddress: satelliteAddress,
|
2020-08-27 15:30:04 +01:00
|
|
|
orderExpiration: config.Expiration,
|
2020-07-24 12:08:58 +01:00
|
|
|
|
|
|
|
rng: mathrand.New(mathrand.NewSource(time.Now().UnixNano())),
|
2020-07-24 18:13:15 +01:00
|
|
|
}, nil
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// VerifyOrderLimitSignature verifies that the signature inside order limit belongs to the satellite.
|
2019-07-01 16:54:11 +01:00
|
|
|
func (service *Service) VerifyOrderLimitSignature(ctx context.Context, signed *pb.OrderLimit) (err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-06-05 14:47:01 +01:00
|
|
|
return signing.VerifyOrderLimitSignature(ctx, service.satellite, signed)
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
func (service *Service) saveSerial(ctx context.Context, serialNumber storj.SerialNumber, bucket metabase.BucketLocation, expiresAt time.Time) (err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2020-08-28 12:56:09 +01:00
|
|
|
return service.orders.CreateSerialInfo(ctx, serialNumber, []byte(bucket.Prefix()), expiresAt)
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
func (service *Service) updateBandwidth(ctx context.Context, bucket metabase.BucketLocation, addressedOrderLimits ...*pb.AddressedOrderLimit) (err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-04-01 21:14:58 +01:00
|
|
|
if len(addressedOrderLimits) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
2019-06-10 15:58:28 +01:00
|
|
|
|
2019-04-01 21:14:58 +01:00
|
|
|
var action pb.PieceAction
|
2019-06-10 15:58:28 +01:00
|
|
|
|
|
|
|
var bucketAllocation int64
|
|
|
|
|
2019-04-01 21:14:58 +01:00
|
|
|
for _, addressedOrderLimit := range addressedOrderLimits {
|
2020-01-27 20:01:37 +00:00
|
|
|
if addressedOrderLimit != nil && addressedOrderLimit.Limit != nil {
|
2019-04-01 21:14:58 +01:00
|
|
|
orderLimit := addressedOrderLimit.Limit
|
|
|
|
action = orderLimit.Action
|
2019-06-10 15:58:28 +01:00
|
|
|
bucketAllocation += orderLimit.Limit
|
2019-04-01 21:14:58 +01:00
|
|
|
}
|
|
|
|
}
|
2019-06-10 15:58:28 +01:00
|
|
|
|
2019-04-09 20:12:58 +01:00
|
|
|
now := time.Now().UTC()
|
2019-04-04 16:20:59 +01:00
|
|
|
intervalStart := time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, now.Location())
|
2019-04-01 21:14:58 +01:00
|
|
|
|
2019-06-12 16:00:29 +01:00
|
|
|
// TODO: all of this below should be a single db transaction. in fact, this whole function should probably be part of an existing transaction
|
2020-08-28 12:56:09 +01:00
|
|
|
if err := service.orders.UpdateBucketBandwidthAllocation(ctx, bucket.ProjectID, []byte(bucket.BucketName), action, bucketAllocation, intervalStart); err != nil {
|
2019-06-10 15:58:28 +01:00
|
|
|
return Error.Wrap(err)
|
2019-04-01 21:14:58 +01:00
|
|
|
}
|
2019-06-10 15:58:28 +01:00
|
|
|
|
2019-04-01 21:14:58 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-03-28 20:09:23 +00:00
|
|
|
// CreateGetOrderLimits creates the order limits for downloading the pieces of pointer.
|
2020-08-28 12:56:09 +01:00
|
|
|
func (service *Service) CreateGetOrderLimits(ctx context.Context, bucket metabase.BucketLocation, pointer *pb.Pointer) (_ []*pb.AddressedOrderLimit, privateKey storj.PiecePrivateKey, err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-07-11 21:51:40 +01:00
|
|
|
|
2019-03-28 20:09:23 +00:00
|
|
|
redundancy, err := eestream.NewRedundancyStrategyFromProto(pointer.GetRemote().GetRedundancy())
|
|
|
|
if err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
pieceSize := eestream.CalcPieceSize(pointer.GetSegmentSize(), redundancy)
|
|
|
|
|
2020-03-13 18:01:48 +00:00
|
|
|
nodeIDs := make([]storj.NodeID, len(pointer.GetRemote().GetRemotePieces()))
|
|
|
|
for i, piece := range pointer.GetRemote().GetRemotePieces() {
|
|
|
|
nodeIDs[i] = piece.NodeId
|
|
|
|
}
|
|
|
|
|
2020-03-30 14:32:02 +01:00
|
|
|
nodes, err := service.overlay.GetOnlineNodesForGetDelete(ctx, nodeIDs)
|
2020-03-13 18:01:48 +00:00
|
|
|
if err != nil {
|
|
|
|
service.log.Debug("error getting nodes from overlay", zap.Error(err))
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
2020-07-24 18:13:15 +01:00
|
|
|
signer, err := NewSignerGet(service, pointer.GetRemote().RootPieceId, time.Now(), pieceSize, bucket)
|
2020-08-14 15:36:30 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
neededLimits := pb.NewRedundancySchemeToStorj(pointer.GetRemote().GetRedundancy()).DownloadNodes()
|
|
|
|
|
|
|
|
pieces := pointer.GetRemote().GetRemotePieces()
|
|
|
|
for _, pieceIndex := range service.perm(len(pieces)) {
|
|
|
|
piece := pieces[pieceIndex]
|
2020-03-13 18:01:48 +00:00
|
|
|
node, ok := nodes[piece.NodeId]
|
|
|
|
if !ok {
|
2019-03-29 08:53:43 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-08-14 15:36:30 +01:00
|
|
|
address := node.Address.Address
|
|
|
|
if node.LastIPPort != "" {
|
|
|
|
address = node.LastIPPort
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-08-14 15:36:30 +01:00
|
|
|
_, err := signer.Sign(ctx, storj.NodeURL{
|
|
|
|
ID: piece.NodeId,
|
|
|
|
Address: address,
|
|
|
|
}, piece.PieceNum)
|
|
|
|
if err != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2020-03-06 22:04:23 +00:00
|
|
|
}
|
2019-03-28 20:09:23 +00:00
|
|
|
|
2020-08-14 15:36:30 +01:00
|
|
|
if len(signer.AddressedLimits) >= int(neededLimits) {
|
|
|
|
break
|
|
|
|
}
|
2019-03-29 09:53:53 +00:00
|
|
|
}
|
2020-08-14 15:36:30 +01:00
|
|
|
if len(signer.AddressedLimits) < redundancy.RequiredCount() {
|
2020-10-13 13:13:41 +01:00
|
|
|
mon.Meter("download_failed_not_enough_pieces_uplink").Mark(1) //mon:locked
|
2020-08-14 15:36:30 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, ErrDownloadFailedNotEnoughPieces.New("not enough orderlimits: got %d, required %d", len(signer.AddressedLimits), redundancy.RequiredCount())
|
2019-03-29 09:53:53 +00:00
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
err = service.saveSerial(ctx, signer.Serial, bucket, signer.OrderExpiration)
|
2020-01-27 20:01:37 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
if err := service.updateBandwidth(ctx, bucket, signer.AddressedLimits...); err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-04-01 21:14:58 +01:00
|
|
|
}
|
|
|
|
|
2020-08-14 15:36:30 +01:00
|
|
|
return signer.AddressedLimits, signer.PrivateKey, nil
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-08-14 15:36:30 +01:00
|
|
|
func (service *Service) perm(n int) []int {
|
2020-01-27 20:01:37 +00:00
|
|
|
service.rngMu.Lock()
|
2020-08-14 15:36:30 +01:00
|
|
|
defer service.rngMu.Unlock()
|
|
|
|
return service.rng.Perm(n)
|
2020-01-27 20:01:37 +00:00
|
|
|
}
|
|
|
|
|
2019-03-28 20:09:23 +00:00
|
|
|
// CreatePutOrderLimits creates the order limits for uploading pieces to nodes.
|
2020-08-28 12:56:09 +01:00
|
|
|
func (service *Service) CreatePutOrderLimits(ctx context.Context, bucket metabase.BucketLocation, nodes []*overlay.SelectedNode, pieceExpiration time.Time, maxPieceSize int64) (_ storj.PieceID, _ []*pb.AddressedOrderLimit, privateKey storj.PiecePrivateKey, err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-07-09 22:54:00 +01:00
|
|
|
|
2020-07-24 18:13:15 +01:00
|
|
|
signer, err := NewSignerPut(service, pieceExpiration, time.Now(), maxPieceSize, bucket)
|
2019-03-28 20:09:23 +00:00
|
|
|
if err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return storj.PieceID{}, nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
for pieceNum, node := range nodes {
|
|
|
|
address := node.Address.Address
|
2020-03-06 22:04:23 +00:00
|
|
|
if node.LastIPPort != "" {
|
2020-07-24 19:57:11 +01:00
|
|
|
address = node.LastIPPort
|
2020-03-06 22:04:23 +00:00
|
|
|
}
|
2020-07-24 19:57:11 +01:00
|
|
|
_, err := signer.Sign(ctx, storj.NodeURL{ID: node.ID, Address: address}, int32(pieceNum))
|
|
|
|
if err != nil {
|
|
|
|
return storj.PieceID{}, nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
err = service.saveSerial(ctx, signer.Serial, bucket, signer.OrderExpiration)
|
2019-03-29 09:53:53 +00:00
|
|
|
if err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return storj.PieceID{}, nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-03-29 09:53:53 +00:00
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
if err := service.updateBandwidth(ctx, bucket, signer.AddressedLimits...); err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return storj.PieceID{}, nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-04-01 21:14:58 +01:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
return signer.RootPieceID, signer.AddressedLimits, signer.PrivateKey, nil
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// CreateDeleteOrderLimits creates the order limits for deleting the pieces of pointer.
|
2020-08-28 12:56:09 +01:00
|
|
|
func (service *Service) CreateDeleteOrderLimits(ctx context.Context, bucket metabase.BucketLocation, pointer *pb.Pointer) (_ []*pb.AddressedOrderLimit, _ storj.PiecePrivateKey, err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-07-11 21:51:40 +01:00
|
|
|
|
2020-03-13 18:01:48 +00:00
|
|
|
nodeIDs := make([]storj.NodeID, len(pointer.GetRemote().GetRemotePieces()))
|
|
|
|
for i, piece := range pointer.GetRemote().GetRemotePieces() {
|
|
|
|
nodeIDs[i] = piece.NodeId
|
|
|
|
}
|
|
|
|
|
2020-03-30 14:32:02 +01:00
|
|
|
nodes, err := service.overlay.GetOnlineNodesForGetDelete(ctx, nodeIDs)
|
2020-03-13 18:01:48 +00:00
|
|
|
if err != nil {
|
|
|
|
service.log.Debug("error getting nodes from overlay", zap.Error(err))
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
2020-07-24 18:13:15 +01:00
|
|
|
signer, err := NewSignerDelete(service, pointer.GetRemote().RootPieceId, time.Now(), bucket)
|
2020-07-24 19:57:11 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
2020-03-30 14:32:02 +01:00
|
|
|
var nodeErrors errs.Group
|
2019-03-28 20:09:23 +00:00
|
|
|
for _, piece := range pointer.GetRemote().GetRemotePieces() {
|
2020-03-13 18:01:48 +00:00
|
|
|
node, ok := nodes[piece.NodeId]
|
|
|
|
if !ok {
|
2020-03-30 14:32:02 +01:00
|
|
|
nodeErrors.Add(errs.New("node %q is not reliable", piece.NodeId))
|
2019-03-29 08:53:43 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
address := node.Address.Address
|
|
|
|
if node.LastIPPort != "" {
|
|
|
|
address = node.LastIPPort
|
|
|
|
}
|
|
|
|
_, err := signer.Sign(ctx, storj.NodeURL{ID: piece.NodeId, Address: address}, piece.PieceNum)
|
2019-03-28 20:09:23 +00:00
|
|
|
if err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
if len(signer.AddressedLimits) == 0 {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.New("failed creating order limits: %w", nodeErrors.Err())
|
2019-03-29 09:53:53 +00:00
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
err = service.saveSerial(ctx, signer.Serial, bucket, signer.OrderExpiration)
|
2019-03-29 09:53:53 +00:00
|
|
|
if err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-03-29 09:53:53 +00:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
return signer.AddressedLimits, signer.PrivateKey, nil
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// CreateAuditOrderLimits creates the order limits for auditing the pieces of pointer.
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
func (service *Service) CreateAuditOrderLimits(ctx context.Context, bucket metabase.BucketLocation, pointer *pb.Pointer, skip map[storj.NodeID]bool) (_ []*pb.AddressedOrderLimit, _ storj.PiecePrivateKey, cachedIPsAndPorts map[storj.NodeID]string, err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2020-07-24 19:57:11 +01:00
|
|
|
|
2019-03-28 20:09:23 +00:00
|
|
|
redundancy := pointer.GetRemote().GetRedundancy()
|
|
|
|
shareSize := redundancy.GetErasureShareSize()
|
|
|
|
totalPieces := redundancy.GetTotal()
|
2019-07-11 21:51:40 +01:00
|
|
|
|
2020-03-13 18:01:48 +00:00
|
|
|
nodeIDs := make([]storj.NodeID, len(pointer.GetRemote().GetRemotePieces()))
|
|
|
|
for i, piece := range pointer.GetRemote().GetRemotePieces() {
|
|
|
|
nodeIDs[i] = piece.NodeId
|
|
|
|
}
|
|
|
|
|
2020-03-30 14:32:02 +01:00
|
|
|
nodes, err := service.overlay.GetOnlineNodesForGetDelete(ctx, nodeIDs)
|
2020-03-13 18:01:48 +00:00
|
|
|
if err != nil {
|
|
|
|
service.log.Debug("error getting nodes from overlay", zap.Error(err))
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, nil, Error.Wrap(err)
|
2020-03-13 18:01:48 +00:00
|
|
|
}
|
|
|
|
|
2020-07-24 18:13:15 +01:00
|
|
|
signer, err := NewSignerAudit(service, pointer.GetRemote().RootPieceId, time.Now(), int64(shareSize), bucket)
|
2020-07-24 19:57:11 +01:00
|
|
|
if err != nil {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, nil, Error.Wrap(err)
|
2020-07-24 19:57:11 +01:00
|
|
|
}
|
|
|
|
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
cachedIPsAndPorts = make(map[storj.NodeID]string)
|
2020-03-30 14:32:02 +01:00
|
|
|
var nodeErrors errs.Group
|
2019-03-28 20:09:23 +00:00
|
|
|
var limitsCount int32
|
|
|
|
limits := make([]*pb.AddressedOrderLimit, totalPieces)
|
|
|
|
for _, piece := range pointer.GetRemote().GetRemotePieces() {
|
2019-05-27 12:13:47 +01:00
|
|
|
if skip[piece.NodeId] {
|
|
|
|
continue
|
|
|
|
}
|
2020-03-13 18:01:48 +00:00
|
|
|
node, ok := nodes[piece.NodeId]
|
|
|
|
if !ok {
|
2020-03-30 14:32:02 +01:00
|
|
|
nodeErrors.Add(errs.New("node %q is not reliable", piece.NodeId))
|
2019-03-29 08:53:43 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
address := node.Address.Address
|
|
|
|
if node.LastIPPort != "" {
|
|
|
|
cachedIPsAndPorts[piece.NodeId] = node.LastIPPort
|
|
|
|
}
|
2020-07-24 19:57:11 +01:00
|
|
|
limit, err := signer.Sign(ctx, storj.NodeURL{
|
|
|
|
ID: piece.NodeId,
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
Address: address,
|
2020-07-24 19:57:11 +01:00
|
|
|
}, piece.PieceNum)
|
2019-03-28 20:09:23 +00:00
|
|
|
if err != nil {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, nil, Error.Wrap(err)
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
limits[piece.GetPieceNum()] = limit
|
2019-03-28 20:09:23 +00:00
|
|
|
limitsCount++
|
|
|
|
}
|
2019-03-29 08:53:43 +00:00
|
|
|
|
2019-03-28 20:09:23 +00:00
|
|
|
if limitsCount < redundancy.GetMinReq() {
|
|
|
|
err = Error.New("not enough nodes available: got %d, required %d", limitsCount, redundancy.GetMinReq())
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, nil, errs.Combine(err, nodeErrors.Err())
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
err = service.saveSerial(ctx, signer.Serial, bucket, signer.OrderExpiration)
|
2019-03-29 09:53:53 +00:00
|
|
|
if err != nil {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, nil, Error.Wrap(err)
|
2019-03-29 09:53:53 +00:00
|
|
|
}
|
2020-08-28 12:56:09 +01:00
|
|
|
if err := service.updateBandwidth(ctx, bucket, limits...); err != nil {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, nil, Error.Wrap(err)
|
2019-04-01 21:14:58 +01:00
|
|
|
}
|
|
|
|
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return limits, signer.PrivateKey, cachedIPsAndPorts, nil
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2019-05-27 12:13:47 +01:00
|
|
|
// CreateAuditOrderLimit creates an order limit for auditing a single the piece from a pointer.
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
func (service *Service) CreateAuditOrderLimit(ctx context.Context, bucket metabase.BucketLocation, nodeID storj.NodeID, pieceNum int32, rootPieceID storj.PieceID, shareSize int32) (limit *pb.AddressedOrderLimit, _ storj.PiecePrivateKey, cachedIPAndPort string, err error) {
|
2019-07-03 17:53:15 +01:00
|
|
|
// TODO reduce number of params ?
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-07-09 22:54:00 +01:00
|
|
|
|
2019-08-06 17:35:59 +01:00
|
|
|
node, err := service.overlay.Get(ctx, nodeID)
|
2019-05-27 12:13:47 +01:00
|
|
|
if err != nil {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, "", Error.Wrap(err)
|
2019-05-27 12:13:47 +01:00
|
|
|
}
|
2019-06-24 15:46:10 +01:00
|
|
|
if node.Disqualified != nil {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, "", overlay.ErrNodeDisqualified.New("%v", nodeID)
|
2019-06-24 15:46:10 +01:00
|
|
|
}
|
2020-08-13 13:00:56 +01:00
|
|
|
if node.ExitStatus.ExitFinishedAt != nil {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, "", overlay.ErrNodeFinishedGE.New("%v", nodeID)
|
2020-08-13 13:00:56 +01:00
|
|
|
}
|
2019-08-06 17:35:59 +01:00
|
|
|
if !service.overlay.IsOnline(node) {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, "", overlay.ErrNodeOffline.New("%v", nodeID)
|
2019-05-27 12:13:47 +01:00
|
|
|
}
|
|
|
|
|
2020-07-24 18:13:15 +01:00
|
|
|
signer, err := NewSignerAudit(service, rootPieceID, time.Now(), int64(shareSize), bucket)
|
2019-05-27 12:13:47 +01:00
|
|
|
if err != nil {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, "", Error.Wrap(err)
|
2019-05-27 12:13:47 +01:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
orderLimit, err := signer.Sign(ctx, storj.NodeURL{
|
|
|
|
ID: nodeID,
|
|
|
|
Address: node.Address.Address,
|
|
|
|
}, pieceNum)
|
|
|
|
if err != nil {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, "", Error.Wrap(err)
|
2019-05-27 12:13:47 +01:00
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
err = service.saveSerial(ctx, signer.Serial, bucket, signer.OrderExpiration)
|
2019-05-27 12:13:47 +01:00
|
|
|
if err != nil {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, "", Error.Wrap(err)
|
2019-05-27 12:13:47 +01:00
|
|
|
}
|
2020-08-28 12:56:09 +01:00
|
|
|
if err := service.updateBandwidth(ctx, bucket, limit); err != nil {
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, "", Error.Wrap(err)
|
2019-05-27 12:13:47 +01:00
|
|
|
}
|
|
|
|
|
satellite/audit: use LastIPAndPort preferentially
This preserves the last_ip_and_port field from node lookups through
CreateAuditOrderLimits() and CreateAuditOrderLimit(), so that later
calls to (*Verifier).GetShare() can try to use that IP and port. If a
connection to the given IP and port cannot be made, or the connection
cannot be verified and secured with the target node identity, an
attempt is made to connect to the original node address instead.
A similar change is not necessary to the other Create*OrderLimits
functions, because they already replace node addresses with the cached
IP and port as appropriate. We might want to consider making a similar
change to CreateGetRepairOrderLimits(), though.
The audit situation is unique because the ramifications are especially
powerful when we get the address wrong. Failing a single audit can have
a heavy cost to a storage node. We need to make extra effort in order
to avoid imposing that cost unfairly.
Situation 1: If an audit fails because the repair worker failed to make
a DNS query (which might well be the fault on the satellite side), and
we have last_ip_and_port information available for the target node, it
would be unfair not to try connecting to that last_ip_and_port address.
Situation 2: If a node has changed addresses recently and the operator
correctly changed its DNS entry, but we don't bother querying DNS, it
would be unfair to penalize the node for our failure to connect to it.
So the audit worker must try both last_ip_and_port _and_ the node
address as supplied by the SNO.
We elect here to try last_ip_and_port first, on the grounds that (a) it
is expected to work in the large majority of cases, and (b) there
should not be any security concerns with connecting to an out-or-date
address, and (c) avoiding DNS queries on the satellite side helps
alleviate satellite operational load.
Change-Id: I9bf6c6c79866d879adecac6144a6c346f4f61200
2020-09-30 05:53:43 +01:00
|
|
|
return orderLimit, signer.PrivateKey, node.LastIPPort, nil
|
2019-05-27 12:13:47 +01:00
|
|
|
}
|
|
|
|
|
2019-07-11 23:44:47 +01:00
|
|
|
// CreateGetRepairOrderLimits creates the order limits for downloading the
|
|
|
|
// healthy pieces of pointer as the source for repair.
|
|
|
|
//
|
|
|
|
// The length of the returned orders slice is the total number of pieces of the
|
|
|
|
// segment, setting to null the ones which don't correspond to a healthy piece.
|
2019-03-28 20:09:23 +00:00
|
|
|
// CreateGetRepairOrderLimits creates the order limits for downloading the healthy pieces of pointer as the source for repair.
|
2020-08-28 12:56:09 +01:00
|
|
|
func (service *Service) CreateGetRepairOrderLimits(ctx context.Context, bucket metabase.BucketLocation, pointer *pb.Pointer, healthy []*pb.RemotePiece) (_ []*pb.AddressedOrderLimit, _ storj.PiecePrivateKey, err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-07-11 21:51:40 +01:00
|
|
|
|
2019-04-03 14:17:32 +01:00
|
|
|
redundancy, err := eestream.NewRedundancyStrategyFromProto(pointer.GetRemote().GetRedundancy())
|
|
|
|
if err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-04-03 14:17:32 +01:00
|
|
|
}
|
2019-07-11 21:51:40 +01:00
|
|
|
|
2019-04-03 14:17:32 +01:00
|
|
|
pieceSize := eestream.CalcPieceSize(pointer.GetSegmentSize(), redundancy)
|
|
|
|
totalPieces := redundancy.TotalCount()
|
2019-03-28 20:09:23 +00:00
|
|
|
|
2020-03-13 18:01:48 +00:00
|
|
|
nodeIDs := make([]storj.NodeID, len(pointer.GetRemote().GetRemotePieces()))
|
|
|
|
for i, piece := range pointer.GetRemote().GetRemotePieces() {
|
|
|
|
nodeIDs[i] = piece.NodeId
|
|
|
|
}
|
|
|
|
|
2020-03-30 14:32:02 +01:00
|
|
|
nodes, err := service.overlay.GetOnlineNodesForGetDelete(ctx, nodeIDs)
|
2020-03-13 18:01:48 +00:00
|
|
|
if err != nil {
|
|
|
|
service.log.Debug("error getting nodes from overlay", zap.Error(err))
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
2020-07-24 18:13:15 +01:00
|
|
|
signer, err := NewSignerRepairGet(service, pointer.GetRemote().RootPieceId, time.Now(), pieceSize, bucket)
|
2020-07-24 19:57:11 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
2020-03-30 14:32:02 +01:00
|
|
|
var nodeErrors errs.Group
|
2019-04-03 14:17:32 +01:00
|
|
|
var limitsCount int
|
2019-03-28 20:09:23 +00:00
|
|
|
limits := make([]*pb.AddressedOrderLimit, totalPieces)
|
|
|
|
for _, piece := range healthy {
|
2020-03-13 18:01:48 +00:00
|
|
|
node, ok := nodes[piece.NodeId]
|
|
|
|
if !ok {
|
2020-03-30 14:32:02 +01:00
|
|
|
nodeErrors.Add(errs.New("node %q is not reliable", piece.NodeId))
|
2019-03-29 08:53:43 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
limit, err := signer.Sign(ctx, storj.NodeURL{
|
|
|
|
ID: piece.NodeId,
|
|
|
|
Address: node.Address.Address,
|
|
|
|
}, piece.PieceNum)
|
2019-03-28 20:09:23 +00:00
|
|
|
if err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
limits[piece.GetPieceNum()] = limit
|
2019-04-03 14:17:32 +01:00
|
|
|
limitsCount++
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2019-04-03 14:17:32 +01:00
|
|
|
if limitsCount < redundancy.RequiredCount() {
|
|
|
|
err = Error.New("not enough nodes available: got %d, required %d", limitsCount, redundancy.RequiredCount())
|
2020-03-30 14:32:02 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, errs.Combine(err, nodeErrors.Err())
|
2019-03-29 08:53:43 +00:00
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
err = service.saveSerial(ctx, signer.Serial, bucket, signer.OrderExpiration)
|
2019-03-29 09:53:53 +00:00
|
|
|
if err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-03-29 09:53:53 +00:00
|
|
|
}
|
2020-08-28 12:56:09 +01:00
|
|
|
if err := service.updateBandwidth(ctx, bucket, limits...); err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-04-01 21:14:58 +01:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
return limits, signer.PrivateKey, nil
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// CreatePutRepairOrderLimits creates the order limits for uploading the repaired pieces of pointer to newNodes.
|
2020-08-28 12:56:09 +01:00
|
|
|
func (service *Service) CreatePutRepairOrderLimits(ctx context.Context, bucket metabase.BucketLocation, pointer *pb.Pointer, getOrderLimits []*pb.AddressedOrderLimit, newNodes []*overlay.SelectedNode, optimalThresholdMultiplier float64) (_ []*pb.AddressedOrderLimit, _ storj.PiecePrivateKey, err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-03-28 20:09:23 +00:00
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
// Create the order limits for being used to upload the repaired pieces
|
|
|
|
redundancy, err := eestream.NewRedundancyStrategyFromProto(pointer.GetRemote().GetRedundancy())
|
2019-07-11 21:51:40 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
2020-07-24 19:57:11 +01:00
|
|
|
pieceSize := eestream.CalcPieceSize(pointer.GetSegmentSize(), redundancy)
|
|
|
|
|
|
|
|
totalPieces := redundancy.TotalCount()
|
|
|
|
totalPiecesAfterRepair := int(math.Ceil(float64(redundancy.OptimalThreshold()) * optimalThresholdMultiplier))
|
|
|
|
if totalPiecesAfterRepair > totalPieces {
|
|
|
|
totalPiecesAfterRepair = totalPieces
|
|
|
|
}
|
|
|
|
|
|
|
|
var numCurrentPieces int
|
|
|
|
for _, o := range getOrderLimits {
|
|
|
|
if o != nil {
|
|
|
|
numCurrentPieces++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
totalPiecesToRepair := totalPiecesAfterRepair - numCurrentPieces
|
2019-07-11 21:51:40 +01:00
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
limits := make([]*pb.AddressedOrderLimit, totalPieces)
|
2020-07-24 18:13:15 +01:00
|
|
|
signer, err := NewSignerRepairPut(service, pointer.GetRemote().RootPieceId, pointer.ExpirationDate, time.Now(), pieceSize, bucket)
|
2019-03-28 20:09:23 +00:00
|
|
|
if err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
var pieceNum int32
|
|
|
|
for _, node := range newNodes {
|
|
|
|
for int(pieceNum) < totalPieces && getOrderLimits[pieceNum] != nil {
|
|
|
|
pieceNum++
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
if int(pieceNum) >= totalPieces { // should not happen
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.New("piece num greater than total pieces: %d >= %d", pieceNum, totalPieces)
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
limit, err := signer.Sign(ctx, storj.NodeURL{
|
|
|
|
ID: node.ID,
|
|
|
|
Address: node.Address.Address,
|
|
|
|
}, pieceNum)
|
|
|
|
if err != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
limits[pieceNum] = limit
|
|
|
|
pieceNum++
|
|
|
|
totalPiecesToRepair--
|
2019-07-11 23:44:47 +01:00
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
if totalPiecesToRepair == 0 {
|
|
|
|
break
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
err = service.saveSerial(ctx, signer.Serial, bucket, signer.OrderExpiration)
|
2019-03-29 09:53:53 +00:00
|
|
|
if err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-03-29 09:53:53 +00:00
|
|
|
}
|
2020-08-28 12:56:09 +01:00
|
|
|
if err := service.updateBandwidth(ctx, bucket, limits...); err != nil {
|
2019-07-11 21:51:40 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-04-01 21:14:58 +01:00
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
return limits, signer.PrivateKey, nil
|
2019-03-28 20:09:23 +00:00
|
|
|
}
|
2019-04-05 08:42:56 +01:00
|
|
|
|
2019-10-11 22:18:05 +01:00
|
|
|
// CreateGracefulExitPutOrderLimit creates an order limit for graceful exit put transfers.
|
2020-08-28 12:56:09 +01:00
|
|
|
func (service *Service) CreateGracefulExitPutOrderLimit(ctx context.Context, bucket metabase.BucketLocation, nodeID storj.NodeID, pieceNum int32, rootPieceID storj.PieceID, shareSize int32) (limit *pb.AddressedOrderLimit, _ storj.PiecePrivateKey, err error) {
|
2019-10-11 22:18:05 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
// should this use KnownReliable or similar?
|
2019-10-11 22:18:05 +01:00
|
|
|
node, err := service.overlay.Get(ctx, nodeID)
|
|
|
|
if err != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
if node.Disqualified != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, overlay.ErrNodeDisqualified.New("%v", nodeID)
|
|
|
|
}
|
|
|
|
if !service.overlay.IsOnline(node) {
|
|
|
|
return nil, storj.PiecePrivateKey{}, overlay.ErrNodeOffline.New("%v", nodeID)
|
|
|
|
}
|
|
|
|
|
2020-07-24 18:13:15 +01:00
|
|
|
signer, err := NewSignerGracefulExit(service, rootPieceID, time.Now(), shareSize, bucket)
|
2019-10-11 22:18:05 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
2020-10-28 03:15:09 +00:00
|
|
|
address := node.Address.Address
|
|
|
|
if node.LastIPPort != "" {
|
|
|
|
address = node.LastIPPort
|
|
|
|
}
|
|
|
|
nodeURL := storj.NodeURL{ID: nodeID, Address: address}
|
2020-07-24 19:57:11 +01:00
|
|
|
limit, err = signer.Sign(ctx, nodeURL, pieceNum)
|
|
|
|
if err != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
2019-10-11 22:18:05 +01:00
|
|
|
}
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
err = service.saveSerial(ctx, signer.Serial, bucket, signer.OrderExpiration)
|
2019-10-11 22:18:05 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
2020-08-28 12:56:09 +01:00
|
|
|
if err := service.updateBandwidth(ctx, bucket, limit); err != nil {
|
2019-10-11 22:18:05 +01:00
|
|
|
return nil, storj.PiecePrivateKey{}, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
2020-07-24 19:57:11 +01:00
|
|
|
return limit, signer.PrivateKey, nil
|
2019-10-11 22:18:05 +01:00
|
|
|
}
|
|
|
|
|
2020-07-16 15:18:02 +01:00
|
|
|
// UpdateGetInlineOrder updates amount of inline GET bandwidth for given bucket.
|
2020-08-28 12:56:09 +01:00
|
|
|
func (service *Service) UpdateGetInlineOrder(ctx context.Context, bucket metabase.BucketLocation, amount int64) (err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-04-09 20:12:58 +01:00
|
|
|
now := time.Now().UTC()
|
2019-04-05 08:42:56 +01:00
|
|
|
intervalStart := time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, now.Location())
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
return service.orders.UpdateBucketBandwidthInline(ctx, bucket.ProjectID, []byte(bucket.BucketName), pb.PieceAction_GET, amount, intervalStart)
|
2019-04-05 08:42:56 +01:00
|
|
|
}
|
|
|
|
|
2020-07-16 15:18:02 +01:00
|
|
|
// UpdatePutInlineOrder updates amount of inline PUT bandwidth for given bucket.
|
2020-08-28 12:56:09 +01:00
|
|
|
func (service *Service) UpdatePutInlineOrder(ctx context.Context, bucket metabase.BucketLocation, amount int64) (err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-04-09 20:12:58 +01:00
|
|
|
now := time.Now().UTC()
|
2019-04-05 08:42:56 +01:00
|
|
|
intervalStart := time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, now.Location())
|
|
|
|
|
2020-08-28 12:56:09 +01:00
|
|
|
return service.orders.UpdateBucketBandwidthInline(ctx, bucket.ProjectID, []byte(bucket.BucketName), pb.PieceAction_PUT, amount, intervalStart)
|
2019-04-05 08:42:56 +01:00
|
|
|
}
|
2020-11-18 21:39:13 +00:00
|
|
|
|
|
|
|
// DecryptOrderMetadata decrypts the order metadata.
|
|
|
|
func (service *Service) DecryptOrderMetadata(ctx context.Context, order *pb.OrderLimit) (_ *pb.OrderLimitMetadata, err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
|
|
|
var orderKeyID EncryptionKeyID
|
|
|
|
copy(orderKeyID[:], order.EncryptedMetadataKeyId)
|
|
|
|
|
|
|
|
var key = service.encryptionKeys.Default
|
|
|
|
if key.ID != orderKeyID {
|
|
|
|
val, ok := service.encryptionKeys.KeyByID[orderKeyID]
|
|
|
|
if !ok {
|
|
|
|
return nil, ErrDecryptOrderMetadata.New("no encryption key found that matches the order.EncryptedMetadataKeyId")
|
|
|
|
}
|
|
|
|
key = EncryptionKey{
|
|
|
|
ID: orderKeyID,
|
|
|
|
Key: val,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return key.DecryptMetadata(order.SerialNumber, order.EncryptedMetadata)
|
|
|
|
}
|