2019-10-11 22:18:05 +01:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package gracefulexit_test
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"io"
|
|
|
|
"strconv"
|
|
|
|
"testing"
|
2019-10-24 20:38:40 +01:00
|
|
|
"time"
|
2019-10-11 22:18:05 +01:00
|
|
|
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/zeebo/errs"
|
2019-10-30 17:40:57 +00:00
|
|
|
"go.uber.org/zap"
|
|
|
|
"go.uber.org/zap/zaptest"
|
2019-10-29 17:23:17 +00:00
|
|
|
"golang.org/x/sync/errgroup"
|
2019-10-11 22:18:05 +01:00
|
|
|
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/errs2"
|
|
|
|
"storj.io/common/identity"
|
|
|
|
"storj.io/common/memory"
|
|
|
|
"storj.io/common/pb"
|
|
|
|
"storj.io/common/rpc/rpcstatus"
|
|
|
|
"storj.io/common/signing"
|
|
|
|
"storj.io/common/storj"
|
2020-10-26 19:16:48 +00:00
|
|
|
"storj.io/common/sync2"
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/testcontext"
|
|
|
|
"storj.io/common/testrand"
|
2019-11-14 19:46:15 +00:00
|
|
|
"storj.io/storj/private/testblobs"
|
|
|
|
"storj.io/storj/private/testplanet"
|
2019-10-30 17:40:57 +00:00
|
|
|
"storj.io/storj/satellite"
|
2021-04-21 13:42:57 +01:00
|
|
|
"storj.io/storj/satellite/metabase"
|
2020-12-18 11:33:28 +00:00
|
|
|
"storj.io/storj/satellite/metainfo"
|
2019-10-30 17:40:57 +00:00
|
|
|
"storj.io/storj/satellite/overlay"
|
2019-10-11 22:18:05 +01:00
|
|
|
"storj.io/storj/storagenode"
|
2019-10-30 17:40:57 +00:00
|
|
|
"storj.io/storj/storagenode/gracefulexit"
|
2019-10-11 22:18:05 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
const numObjects = 6
|
2021-02-10 10:15:19 +00:00
|
|
|
const numMultipartObjects = 6
|
2019-10-11 22:18:05 +01:00
|
|
|
|
2019-10-12 14:06:20 +01:00
|
|
|
// exitProcessClient is used so we can pass the graceful exit process clients regardless of implementation.
|
|
|
|
type exitProcessClient interface {
|
|
|
|
Send(*pb.StorageNodeMessage) error
|
|
|
|
Recv() (*pb.SatelliteMessage, error)
|
|
|
|
}
|
|
|
|
|
2019-10-11 22:18:05 +01:00
|
|
|
func TestSuccess(t *testing.T) {
|
2021-02-10 10:15:19 +00:00
|
|
|
testTransfers(t, numObjects, numMultipartObjects, func(t *testing.T, ctx *testcontext.Context, nodeFullIDs map[storj.NodeID]*identity.FullIdentity, satellite *testplanet.Satellite, processClient exitProcessClient, exitingNode *storagenode.Peer, numPieces int) {
|
2019-10-11 22:18:05 +01:00
|
|
|
var pieceID storj.PieceID
|
|
|
|
failedCount := 0
|
2019-10-29 18:40:42 +00:00
|
|
|
deletedCount := 0
|
2019-10-11 22:18:05 +01:00
|
|
|
for {
|
|
|
|
response, err := processClient.Recv()
|
2019-10-12 14:06:20 +01:00
|
|
|
if errs.Is(err, io.EOF) {
|
2019-10-11 22:18:05 +01:00
|
|
|
// Done
|
|
|
|
break
|
|
|
|
}
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
|
|
|
|
// pick the first one to fail
|
|
|
|
if pieceID.IsZero() {
|
2019-10-15 20:59:12 +01:00
|
|
|
pieceID = m.TransferPiece.OriginalPieceId
|
2019-10-11 22:18:05 +01:00
|
|
|
}
|
|
|
|
|
2019-10-15 20:59:12 +01:00
|
|
|
if failedCount > 0 || pieceID != m.TransferPiece.OriginalPieceId {
|
2019-10-24 20:38:40 +01:00
|
|
|
|
|
|
|
pieceReader, err := exitingNode.Storage2.Store.Reader(ctx, satellite.ID(), m.TransferPiece.OriginalPieceId)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
header, err := pieceReader.GetPieceHeader()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
orderLimit := header.OrderLimit
|
|
|
|
originalPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: orderLimit.PieceId,
|
|
|
|
Hash: header.GetHash(),
|
|
|
|
PieceSize: pieceReader.Size(),
|
|
|
|
Timestamp: header.GetCreationTime(),
|
|
|
|
Signature: header.GetSignature(),
|
|
|
|
}
|
|
|
|
|
|
|
|
newPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: m.TransferPiece.AddressedOrderLimit.Limit.PieceId,
|
|
|
|
Hash: originalPieceHash.Hash,
|
|
|
|
PieceSize: originalPieceHash.PieceSize,
|
|
|
|
Timestamp: time.Now(),
|
|
|
|
}
|
|
|
|
|
|
|
|
receivingNodeID := nodeFullIDs[m.TransferPiece.AddressedOrderLimit.Limit.StorageNodeId]
|
|
|
|
require.NotNil(t, receivingNodeID)
|
|
|
|
signer := signing.SignerFromFullIdentity(receivingNodeID)
|
|
|
|
|
|
|
|
signedNewPieceHash, err := signing.SignPieceHash(ctx, signer, newPieceHash)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
2019-10-11 22:18:05 +01:00
|
|
|
success := &pb.StorageNodeMessage{
|
|
|
|
Message: &pb.StorageNodeMessage_Succeeded{
|
|
|
|
Succeeded: &pb.TransferSucceeded{
|
2019-10-24 20:38:40 +01:00
|
|
|
OriginalPieceId: m.TransferPiece.OriginalPieceId,
|
|
|
|
OriginalPieceHash: originalPieceHash,
|
|
|
|
OriginalOrderLimit: &orderLimit,
|
|
|
|
ReplacementPieceHash: signedNewPieceHash,
|
2019-10-11 22:18:05 +01:00
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
err = processClient.Send(success)
|
|
|
|
require.NoError(t, err)
|
|
|
|
} else {
|
|
|
|
failedCount++
|
|
|
|
failed := &pb.StorageNodeMessage{
|
|
|
|
Message: &pb.StorageNodeMessage_Failed{
|
|
|
|
Failed: &pb.TransferFailed{
|
2019-10-15 20:59:12 +01:00
|
|
|
OriginalPieceId: m.TransferPiece.OriginalPieceId,
|
|
|
|
Error: pb.TransferFailed_UNKNOWN,
|
2019-10-11 22:18:05 +01:00
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
err = processClient.Send(failed)
|
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
2019-10-29 18:40:42 +00:00
|
|
|
case *pb.SatelliteMessage_DeletePiece:
|
|
|
|
deletedCount++
|
2019-10-11 22:18:05 +01:00
|
|
|
case *pb.SatelliteMessage_ExitCompleted:
|
2019-10-29 18:40:42 +00:00
|
|
|
signee := signing.SigneeFromPeerIdentity(satellite.Identity.PeerIdentity())
|
|
|
|
err = signing.VerifyExitCompleted(ctx, signee, m.ExitCompleted)
|
|
|
|
require.NoError(t, err)
|
2019-10-11 22:18:05 +01:00
|
|
|
default:
|
2021-02-10 10:15:19 +00:00
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
2019-10-11 22:18:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// check that the exit has completed and we have the correct transferred/failed values
|
|
|
|
progress, err := satellite.DB.GracefulExit().GetProgress(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
require.EqualValues(t, numPieces, progress.PiecesTransferred)
|
2019-10-29 18:40:42 +00:00
|
|
|
require.EqualValues(t, numPieces, deletedCount)
|
2019-10-11 22:18:05 +01:00
|
|
|
// even though we failed 1, it eventually succeeded, so the count should be 0
|
|
|
|
require.EqualValues(t, 0, progress.PiecesFailed)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2019-10-29 17:23:17 +00:00
|
|
|
func TestConcurrentConnections(t *testing.T) {
|
2019-10-31 14:57:54 +00:00
|
|
|
successThreshold := 4
|
2019-10-29 17:23:17 +00:00
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1,
|
|
|
|
StorageNodeCount: successThreshold + 1,
|
|
|
|
UplinkCount: 1,
|
2020-01-21 10:38:41 +00:00
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: testplanet.ReconfigureRS(2, 3, successThreshold, successThreshold),
|
|
|
|
},
|
2019-10-29 17:23:17 +00:00
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
uplinkPeer := planet.Uplinks[0]
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
|
|
|
|
satellite.GracefulExit.Chore.Loop.Pause()
|
|
|
|
|
2020-01-21 10:38:41 +00:00
|
|
|
err := uplinkPeer.Upload(ctx, satellite, "testbucket", "test/path1", testrand.Bytes(5*memory.KiB))
|
2019-10-29 17:23:17 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// check that there are no exiting nodes.
|
|
|
|
exitingNodeIDs, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitingNodeIDs, 0)
|
|
|
|
|
|
|
|
exitingNode, err := findNodeToExit(ctx, planet, 2)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
var group errgroup.Group
|
|
|
|
concurrentCalls := 4
|
2020-10-26 19:16:48 +00:00
|
|
|
|
|
|
|
var mainStarted sync2.Fence
|
|
|
|
defer mainStarted.Release()
|
|
|
|
|
2019-10-29 17:23:17 +00:00
|
|
|
for i := 0; i < concurrentCalls; i++ {
|
|
|
|
group.Go(func() (err error) {
|
|
|
|
// connect to satellite so we initiate the exit.
|
2020-05-19 16:49:13 +01:00
|
|
|
conn, err := exitingNode.Dialer.DialNodeURL(ctx, satellite.NodeURL())
|
2019-10-29 17:23:17 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
defer func() {
|
|
|
|
err = errs.Combine(err, conn.Close())
|
|
|
|
}()
|
|
|
|
|
2020-03-25 12:15:27 +00:00
|
|
|
client := pb.NewDRPCSatelliteGracefulExitClient(conn)
|
2019-10-29 17:23:17 +00:00
|
|
|
|
2020-10-26 19:16:48 +00:00
|
|
|
if !mainStarted.Wait(ctx) {
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
2019-10-29 17:23:17 +00:00
|
|
|
|
|
|
|
c, err := client.Process(ctx)
|
|
|
|
require.NoError(t, err)
|
2020-10-27 14:11:52 +00:00
|
|
|
defer func() {
|
|
|
|
err = errs.Combine(err, c.Close())
|
|
|
|
}()
|
2019-10-29 17:23:17 +00:00
|
|
|
|
|
|
|
_, err = c.Recv()
|
|
|
|
require.Error(t, err)
|
2019-12-20 17:35:02 +00:00
|
|
|
require.True(t, errs2.IsRPC(err, rpcstatus.Aborted))
|
2019-10-29 17:23:17 +00:00
|
|
|
return nil
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// connect to satellite so we initiate the exit ("main" call)
|
2020-05-19 16:49:13 +01:00
|
|
|
conn, err := exitingNode.Dialer.DialNodeURL(ctx, satellite.NodeURL())
|
2019-10-29 17:23:17 +00:00
|
|
|
require.NoError(t, err)
|
2019-11-01 14:21:24 +00:00
|
|
|
defer ctx.Check(conn.Close)
|
2019-10-29 17:23:17 +00:00
|
|
|
|
2020-03-25 12:15:27 +00:00
|
|
|
client := pb.NewDRPCSatelliteGracefulExitClient(conn)
|
2020-10-27 14:11:52 +00:00
|
|
|
|
|
|
|
{ // this connection will immediately return since graceful exit has not been initiated yet
|
|
|
|
c, err := client.Process(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
response, err := c.Recv()
|
|
|
|
require.NoError(t, err)
|
2021-02-10 10:15:19 +00:00
|
|
|
switch m := response.GetMessage().(type) {
|
2020-10-27 14:11:52 +00:00
|
|
|
case *pb.SatelliteMessage_NotReady:
|
|
|
|
default:
|
2021-02-10 10:15:19 +00:00
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
2020-10-27 14:11:52 +00:00
|
|
|
}
|
|
|
|
require.NoError(t, c.Close())
|
2019-10-29 17:23:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// wait for initial loop to start so we have pieces to transfer
|
|
|
|
satellite.GracefulExit.Chore.Loop.TriggerWait()
|
|
|
|
|
2020-10-27 14:11:52 +00:00
|
|
|
{ // this connection should not close immediately, since there are pieces to transfer
|
|
|
|
c, err := client.Process(ctx)
|
|
|
|
require.NoError(t, err)
|
2019-10-29 17:23:17 +00:00
|
|
|
|
2020-10-27 14:11:52 +00:00
|
|
|
_, err = c.Recv()
|
|
|
|
require.NoError(t, err)
|
2019-10-29 17:23:17 +00:00
|
|
|
|
2020-10-27 14:11:52 +00:00
|
|
|
// deferring here to ensure that the other connections see the in-use connection.
|
|
|
|
defer ctx.Check(c.Close)
|
|
|
|
}
|
2019-10-29 17:23:17 +00:00
|
|
|
// start receiving from concurrent connections
|
2020-10-26 19:16:48 +00:00
|
|
|
mainStarted.Release()
|
2019-10-29 17:23:17 +00:00
|
|
|
|
|
|
|
err = group.Wait()
|
|
|
|
require.NoError(t, err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2019-10-30 17:40:57 +00:00
|
|
|
func TestRecvTimeout(t *testing.T) {
|
2019-10-31 14:57:54 +00:00
|
|
|
successThreshold := 4
|
2019-10-30 17:40:57 +00:00
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1,
|
2019-10-31 14:57:54 +00:00
|
|
|
StorageNodeCount: successThreshold + 1,
|
2019-10-30 17:40:57 +00:00
|
|
|
UplinkCount: 1,
|
|
|
|
Reconfigure: testplanet.Reconfigure{
|
2020-03-27 16:18:19 +00:00
|
|
|
StorageNodeDB: func(index int, db storagenode.DB, log *zap.Logger) (storagenode.DB, error) {
|
2019-10-30 17:40:57 +00:00
|
|
|
return testblobs.NewSlowDB(log.Named("slowdb"), db), nil
|
|
|
|
},
|
2020-10-27 17:34:59 +00:00
|
|
|
Satellite: testplanet.Combine(
|
|
|
|
func(log *zap.Logger, index int, config *satellite.Config) {
|
|
|
|
// This config value will create a very short timeframe allowed for receiving
|
|
|
|
// data from storage nodes. This will cause context to cancel with timeout.
|
|
|
|
config.GracefulExit.RecvTimeout = 10 * time.Millisecond
|
|
|
|
},
|
|
|
|
testplanet.ReconfigureRS(2, 3, successThreshold, successThreshold),
|
|
|
|
),
|
2020-10-20 20:58:54 +01:00
|
|
|
StorageNode: func(index int, config *storagenode.Config) {
|
|
|
|
config.GracefulExit = gracefulexit.Config{
|
|
|
|
ChoreInterval: 2 * time.Minute,
|
|
|
|
NumWorkers: 2,
|
|
|
|
NumConcurrentTransfers: 2,
|
|
|
|
MinBytesPerSecond: 128,
|
|
|
|
MinDownloadTimeout: 2 * time.Minute,
|
|
|
|
}
|
|
|
|
},
|
2019-10-30 17:40:57 +00:00
|
|
|
},
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
ul := planet.Uplinks[0]
|
|
|
|
|
|
|
|
satellite.GracefulExit.Chore.Loop.Pause()
|
|
|
|
|
2020-01-21 10:38:41 +00:00
|
|
|
err := ul.Upload(ctx, satellite, "testbucket", "test/path1", testrand.Bytes(5*memory.KiB))
|
2019-10-30 17:40:57 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
exitingNode, err := findNodeToExit(ctx, planet, 1)
|
|
|
|
require.NoError(t, err)
|
|
|
|
exitingNode.GracefulExit.Chore.Loop.Pause()
|
|
|
|
|
|
|
|
exitStatusReq := overlay.ExitStatusRequest{
|
|
|
|
NodeID: exitingNode.ID(),
|
|
|
|
ExitInitiatedAt: time.Now(),
|
|
|
|
}
|
|
|
|
_, err = satellite.Overlay.DB.UpdateExitStatus(ctx, &exitStatusReq)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// run the satellite chore to build the transfer queue.
|
|
|
|
satellite.GracefulExit.Chore.Loop.TriggerWait()
|
|
|
|
satellite.GracefulExit.Chore.Loop.Pause()
|
|
|
|
|
|
|
|
// check that the satellite knows the storage node is exiting.
|
|
|
|
exitingNodes, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitingNodes, 1)
|
|
|
|
require.Equal(t, exitingNode.ID(), exitingNodes[0].NodeID)
|
|
|
|
|
|
|
|
queueItems, err := satellite.DB.GracefulExit().GetIncomplete(ctx, exitingNode.ID(), 10, 0)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, queueItems, 1)
|
|
|
|
|
|
|
|
storageNodeDB := exitingNode.DB.(*testblobs.SlowDB)
|
|
|
|
// make uploads on storage node slower than the timeout for transferring bytes to another node
|
|
|
|
delay := 200 * time.Millisecond
|
|
|
|
storageNodeDB.SetLatency(delay)
|
|
|
|
|
|
|
|
// run the SN chore again to start processing transfers.
|
2020-10-20 20:58:54 +01:00
|
|
|
worker := gracefulexit.NewWorker(zaptest.NewLogger(t), exitingNode.GracefulExit.Service, exitingNode.PieceTransfer.Service, exitingNode.Dialer, satellite.NodeURL(), exitingNode.Config.GracefulExit)
|
2019-12-17 15:06:47 +00:00
|
|
|
defer ctx.Check(worker.Close)
|
|
|
|
|
2019-10-30 17:40:57 +00:00
|
|
|
err = worker.Run(ctx, func() {})
|
|
|
|
require.Error(t, err)
|
|
|
|
require.True(t, errs2.IsRPC(err, rpcstatus.DeadlineExceeded))
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2019-10-24 20:38:40 +01:00
|
|
|
func TestInvalidStorageNodeSignature(t *testing.T) {
|
2021-02-10 10:15:19 +00:00
|
|
|
testTransfers(t, 1, 0, func(t *testing.T, ctx *testcontext.Context, nodeFullIDs map[storj.NodeID]*identity.FullIdentity, satellite *testplanet.Satellite, processClient exitProcessClient, exitingNode *storagenode.Peer, numPieces int) {
|
2019-10-24 20:38:40 +01:00
|
|
|
response, err := processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
pieceReader, err := exitingNode.Storage2.Store.Reader(ctx, satellite.ID(), m.TransferPiece.OriginalPieceId)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
header, err := pieceReader.GetPieceHeader()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
orderLimit := header.OrderLimit
|
|
|
|
|
|
|
|
originalPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: orderLimit.PieceId,
|
|
|
|
Hash: header.GetHash(),
|
|
|
|
PieceSize: pieceReader.Size(),
|
|
|
|
Timestamp: header.GetCreationTime(),
|
|
|
|
Signature: header.GetSignature(),
|
2019-10-11 22:18:05 +01:00
|
|
|
}
|
2019-10-24 20:38:40 +01:00
|
|
|
|
|
|
|
newPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: m.TransferPiece.AddressedOrderLimit.Limit.PieceId,
|
|
|
|
Hash: originalPieceHash.Hash,
|
|
|
|
PieceSize: originalPieceHash.PieceSize,
|
|
|
|
Timestamp: time.Now(),
|
|
|
|
}
|
|
|
|
|
|
|
|
wrongSigner := signing.SignerFromFullIdentity(exitingNode.Identity)
|
|
|
|
|
|
|
|
signedNewPieceHash, err := signing.SignPieceHash(ctx, wrongSigner, newPieceHash)
|
2019-10-11 22:18:05 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2019-10-24 20:38:40 +01:00
|
|
|
message := &pb.StorageNodeMessage{
|
|
|
|
Message: &pb.StorageNodeMessage_Succeeded{
|
|
|
|
Succeeded: &pb.TransferSucceeded{
|
|
|
|
OriginalPieceId: m.TransferPiece.OriginalPieceId,
|
|
|
|
OriginalPieceHash: originalPieceHash,
|
|
|
|
OriginalOrderLimit: &orderLimit,
|
|
|
|
ReplacementPieceHash: signedNewPieceHash,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
err = processClient.Send(message)
|
|
|
|
require.NoError(t, err)
|
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
2019-10-11 22:18:05 +01:00
|
|
|
|
2019-10-25 18:16:20 +01:00
|
|
|
response, err = processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_ExitFailed:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
require.NotNil(t, m.ExitFailed)
|
|
|
|
require.Equal(t, m.ExitFailed.Reason, pb.ExitFailed_VERIFICATION_FAILED)
|
2020-01-03 19:11:47 +00:00
|
|
|
|
|
|
|
node, err := satellite.DB.OverlayCache().Get(ctx, m.ExitFailed.NodeId)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, node.Disqualified)
|
2019-10-25 18:16:20 +01:00
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
2019-10-24 20:38:40 +01:00
|
|
|
|
|
|
|
// check that the exit has completed and we have the correct transferred/failed values
|
2019-11-07 17:19:34 +00:00
|
|
|
progress, err := satellite.DB.GracefulExit().GetProgress(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
require.Equal(t, int64(0), progress.PiecesTransferred)
|
|
|
|
require.Equal(t, int64(1), progress.PiecesFailed)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestExitDisqualifiedNodeFailOnStart(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1,
|
|
|
|
StorageNodeCount: 2,
|
|
|
|
UplinkCount: 1,
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
exitingNode := planet.StorageNodes[0]
|
|
|
|
|
2020-01-03 00:00:18 +00:00
|
|
|
err := satellite.DB.OverlayCache().DisqualifyNode(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
2019-11-07 17:19:34 +00:00
|
|
|
|
2020-05-19 16:49:13 +01:00
|
|
|
conn, err := exitingNode.Dialer.DialNodeURL(ctx, satellite.NodeURL())
|
2019-11-07 17:19:34 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
defer ctx.Check(conn.Close)
|
|
|
|
|
2020-03-25 12:15:27 +00:00
|
|
|
client := pb.NewDRPCSatelliteGracefulExitClient(conn)
|
2019-11-07 17:19:34 +00:00
|
|
|
processClient, err := client.Process(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Process endpoint should return immediately if node is disqualified
|
|
|
|
response, err := processClient.Recv()
|
2019-12-20 17:35:02 +00:00
|
|
|
require.True(t, errs2.IsRPC(err, rpcstatus.FailedPrecondition))
|
2019-11-07 17:19:34 +00:00
|
|
|
require.Nil(t, response)
|
|
|
|
|
2020-10-27 14:11:52 +00:00
|
|
|
require.NoError(t, processClient.Close())
|
|
|
|
|
2019-11-07 17:19:34 +00:00
|
|
|
// disqualified node should fail graceful exit
|
|
|
|
exitStatus, err := satellite.Overlay.DB.GetExitStatus(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, exitStatus.ExitFinishedAt)
|
|
|
|
require.False(t, exitStatus.ExitSuccess)
|
|
|
|
})
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestExitDisqualifiedNodeFailEventually(t *testing.T) {
|
2021-02-10 10:15:19 +00:00
|
|
|
testTransfers(t, numObjects, numMultipartObjects, func(t *testing.T, ctx *testcontext.Context, nodeFullIDs map[storj.NodeID]*identity.FullIdentity, satellite *testplanet.Satellite, processClient exitProcessClient, exitingNode *storagenode.Peer, numPieces int) {
|
2020-01-03 00:00:18 +00:00
|
|
|
var disqualifiedError error
|
|
|
|
isDisqualified := false
|
2019-11-07 17:19:34 +00:00
|
|
|
for {
|
|
|
|
response, err := processClient.Recv()
|
|
|
|
if errs.Is(err, io.EOF) {
|
|
|
|
// Done
|
|
|
|
break
|
|
|
|
}
|
2020-01-03 00:00:18 +00:00
|
|
|
if errs2.IsRPC(err, rpcstatus.FailedPrecondition) {
|
|
|
|
disqualifiedError = err
|
2019-11-07 17:19:34 +00:00
|
|
|
break
|
2020-01-03 00:00:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if !isDisqualified {
|
|
|
|
err := satellite.DB.OverlayCache().DisqualifyNode(ctx, exitingNode.ID())
|
2019-11-07 17:19:34 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
|
|
|
|
pieceReader, err := exitingNode.Storage2.Store.Reader(ctx, satellite.ID(), m.TransferPiece.OriginalPieceId)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
header, err := pieceReader.GetPieceHeader()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
orderLimit := header.OrderLimit
|
|
|
|
originalPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: orderLimit.PieceId,
|
|
|
|
Hash: header.GetHash(),
|
|
|
|
PieceSize: pieceReader.Size(),
|
|
|
|
Timestamp: header.GetCreationTime(),
|
|
|
|
Signature: header.GetSignature(),
|
|
|
|
}
|
|
|
|
|
|
|
|
newPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: m.TransferPiece.AddressedOrderLimit.Limit.PieceId,
|
|
|
|
Hash: originalPieceHash.Hash,
|
|
|
|
PieceSize: originalPieceHash.PieceSize,
|
|
|
|
Timestamp: time.Now(),
|
|
|
|
}
|
|
|
|
|
|
|
|
receivingNodeID := nodeFullIDs[m.TransferPiece.AddressedOrderLimit.Limit.StorageNodeId]
|
|
|
|
require.NotNil(t, receivingNodeID)
|
|
|
|
signer := signing.SignerFromFullIdentity(receivingNodeID)
|
|
|
|
|
|
|
|
signedNewPieceHash, err := signing.SignPieceHash(ctx, signer, newPieceHash)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
success := &pb.StorageNodeMessage{
|
|
|
|
Message: &pb.StorageNodeMessage_Succeeded{
|
|
|
|
Succeeded: &pb.TransferSucceeded{
|
|
|
|
OriginalPieceId: m.TransferPiece.OriginalPieceId,
|
|
|
|
OriginalPieceHash: originalPieceHash,
|
|
|
|
OriginalOrderLimit: &orderLimit,
|
|
|
|
ReplacementPieceHash: signedNewPieceHash,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
err = processClient.Send(success)
|
|
|
|
require.NoError(t, err)
|
|
|
|
case *pb.SatelliteMessage_DeletePiece:
|
2020-01-03 00:00:18 +00:00
|
|
|
continue
|
2019-11-07 17:19:34 +00:00
|
|
|
default:
|
2021-02-10 10:15:19 +00:00
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
2019-11-07 17:19:34 +00:00
|
|
|
}
|
|
|
|
}
|
2020-01-03 00:00:18 +00:00
|
|
|
// check that the exit has failed due to node has been disqualified
|
|
|
|
require.True(t, errs2.IsRPC(disqualifiedError, rpcstatus.FailedPrecondition))
|
2019-11-07 17:19:34 +00:00
|
|
|
|
|
|
|
// check that the exit has completed and we have the correct transferred/failed values
|
|
|
|
progress, err := satellite.DB.GracefulExit().GetProgress(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
require.EqualValues(t, numPieces, progress.PiecesTransferred)
|
|
|
|
|
|
|
|
// disqualified node should fail graceful exit
|
|
|
|
exitStatus, err := satellite.Overlay.DB.GetExitStatus(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, exitStatus.ExitFinishedAt)
|
|
|
|
require.False(t, exitStatus.ExitSuccess)
|
2019-10-24 20:38:40 +01:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestFailureHashMismatch(t *testing.T) {
|
2021-02-10 10:15:19 +00:00
|
|
|
testTransfers(t, 1, 0, testFailureHashMismatch)
|
|
|
|
}
|
2019-10-24 20:38:40 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
func TestFailureHashMismatchMultipart(t *testing.T) {
|
|
|
|
testTransfers(t, 0, 1, testFailureHashMismatch)
|
|
|
|
}
|
2019-10-24 20:38:40 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
func testFailureHashMismatch(t *testing.T, ctx *testcontext.Context, nodeFullIDs map[storj.NodeID]*identity.FullIdentity, satellite *testplanet.Satellite, processClient exitProcessClient, exitingNode *storagenode.Peer, numPieces int) {
|
|
|
|
response, err := processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
2019-10-24 20:38:40 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
pieceReader, err := exitingNode.Storage2.Store.Reader(ctx, satellite.ID(), m.TransferPiece.OriginalPieceId)
|
|
|
|
require.NoError(t, err)
|
2019-10-24 20:38:40 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
header, err := pieceReader.GetPieceHeader()
|
|
|
|
require.NoError(t, err)
|
2019-10-24 20:38:40 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
orderLimit := header.OrderLimit
|
|
|
|
originalPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: orderLimit.PieceId,
|
|
|
|
Hash: header.GetHash(),
|
|
|
|
PieceSize: pieceReader.Size(),
|
|
|
|
Timestamp: header.GetCreationTime(),
|
|
|
|
Signature: header.GetSignature(),
|
|
|
|
}
|
2019-10-24 20:38:40 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
newPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: m.TransferPiece.AddressedOrderLimit.Limit.PieceId,
|
|
|
|
Hash: originalPieceHash.Hash[:1],
|
|
|
|
PieceSize: originalPieceHash.PieceSize,
|
|
|
|
Timestamp: time.Now(),
|
|
|
|
}
|
2019-10-24 20:38:40 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
receivingNodeID := nodeFullIDs[m.TransferPiece.AddressedOrderLimit.Limit.StorageNodeId]
|
|
|
|
require.NotNil(t, receivingNodeID)
|
|
|
|
signer := signing.SignerFromFullIdentity(receivingNodeID)
|
|
|
|
|
|
|
|
signedNewPieceHash, err := signing.SignPieceHash(ctx, signer, newPieceHash)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
message := &pb.StorageNodeMessage{
|
|
|
|
Message: &pb.StorageNodeMessage_Succeeded{
|
|
|
|
Succeeded: &pb.TransferSucceeded{
|
|
|
|
OriginalPieceId: m.TransferPiece.OriginalPieceId,
|
|
|
|
OriginalPieceHash: originalPieceHash,
|
|
|
|
OriginalOrderLimit: &orderLimit,
|
|
|
|
ReplacementPieceHash: signedNewPieceHash,
|
2019-10-24 20:38:40 +01:00
|
|
|
},
|
2021-02-10 10:15:19 +00:00
|
|
|
},
|
2019-10-11 22:18:05 +01:00
|
|
|
}
|
2021-02-10 10:15:19 +00:00
|
|
|
err = processClient.Send(message)
|
2019-10-25 18:16:20 +01:00
|
|
|
require.NoError(t, err)
|
2021-02-10 10:15:19 +00:00
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
2019-10-25 18:16:20 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
response, err = processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
2020-01-03 19:11:47 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_ExitFailed:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
require.NotNil(t, m.ExitFailed)
|
|
|
|
require.Equal(t, m.ExitFailed.Reason, pb.ExitFailed_VERIFICATION_FAILED)
|
2019-10-24 20:38:40 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
node, err := satellite.DB.OverlayCache().Get(ctx, m.ExitFailed.NodeId)
|
2019-11-07 17:19:34 +00:00
|
|
|
require.NoError(t, err)
|
2021-02-10 10:15:19 +00:00
|
|
|
require.NotNil(t, node.Disqualified)
|
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
2019-11-07 17:19:34 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
// check that the exit has completed and we have the correct transferred/failed values
|
|
|
|
progress, err := satellite.DB.GracefulExit().GetProgress(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
require.Equal(t, int64(0), progress.PiecesTransferred)
|
|
|
|
require.Equal(t, int64(1), progress.PiecesFailed)
|
2019-10-24 20:38:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestFailureUnknownError(t *testing.T) {
|
2021-02-10 10:15:19 +00:00
|
|
|
testTransfers(t, 1, 0, func(t *testing.T, ctx *testcontext.Context, nodeFullIDs map[storj.NodeID]*identity.FullIdentity, satellite *testplanet.Satellite, processClient exitProcessClient, exitingNode *storagenode.Peer, numPieces int) {
|
2019-10-24 20:38:40 +01:00
|
|
|
response, err := processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
message := &pb.StorageNodeMessage{
|
|
|
|
Message: &pb.StorageNodeMessage_Failed{
|
|
|
|
Failed: &pb.TransferFailed{
|
|
|
|
Error: pb.TransferFailed_UNKNOWN,
|
|
|
|
OriginalPieceId: m.TransferPiece.OriginalPieceId,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
err = processClient.Send(message)
|
|
|
|
require.NoError(t, err)
|
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
|
|
|
|
2019-10-25 18:16:20 +01:00
|
|
|
response, err = processClient.Recv()
|
2019-10-24 20:38:40 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2019-10-25 18:16:20 +01:00
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
|
|
|
|
2019-10-24 20:38:40 +01:00
|
|
|
// check that the exit has completed and we have the correct transferred/failed values
|
2019-11-07 17:19:34 +00:00
|
|
|
progress, err := satellite.DB.GracefulExit().GetProgress(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
require.Equal(t, int64(0), progress.PiecesTransferred)
|
|
|
|
require.Equal(t, int64(0), progress.PiecesFailed)
|
2019-10-24 20:38:40 +01:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestFailureUplinkSignature(t *testing.T) {
|
2021-02-10 10:15:19 +00:00
|
|
|
testTransfers(t, 1, 0, func(t *testing.T, ctx *testcontext.Context, nodeFullIDs map[storj.NodeID]*identity.FullIdentity, satellite *testplanet.Satellite, processClient exitProcessClient, exitingNode *storagenode.Peer, numPieces int) {
|
2019-10-24 20:38:40 +01:00
|
|
|
response, err := processClient.Recv()
|
2019-10-11 22:18:05 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2019-10-24 20:38:40 +01:00
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
pieceReader, err := exitingNode.Storage2.Store.Reader(ctx, satellite.ID(), m.TransferPiece.OriginalPieceId)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
header, err := pieceReader.GetPieceHeader()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
orderLimit := header.OrderLimit
|
|
|
|
orderLimit.UplinkPublicKey = storj.PiecePublicKey{}
|
|
|
|
|
|
|
|
originalPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: orderLimit.PieceId,
|
|
|
|
Hash: header.GetHash(),
|
|
|
|
PieceSize: pieceReader.Size(),
|
|
|
|
Timestamp: header.GetCreationTime(),
|
|
|
|
Signature: header.GetSignature(),
|
|
|
|
}
|
|
|
|
|
|
|
|
newPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: m.TransferPiece.AddressedOrderLimit.Limit.PieceId,
|
|
|
|
Hash: originalPieceHash.Hash,
|
|
|
|
PieceSize: originalPieceHash.PieceSize,
|
|
|
|
Timestamp: time.Now(),
|
|
|
|
}
|
|
|
|
|
|
|
|
receivingNodeID := nodeFullIDs[m.TransferPiece.AddressedOrderLimit.Limit.StorageNodeId]
|
|
|
|
require.NotNil(t, receivingNodeID)
|
|
|
|
signer := signing.SignerFromFullIdentity(receivingNodeID)
|
|
|
|
|
|
|
|
signedNewPieceHash, err := signing.SignPieceHash(ctx, signer, newPieceHash)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
message := &pb.StorageNodeMessage{
|
|
|
|
Message: &pb.StorageNodeMessage_Succeeded{
|
|
|
|
Succeeded: &pb.TransferSucceeded{
|
|
|
|
OriginalPieceId: m.TransferPiece.OriginalPieceId,
|
|
|
|
OriginalPieceHash: originalPieceHash,
|
|
|
|
OriginalOrderLimit: &orderLimit,
|
|
|
|
ReplacementPieceHash: signedNewPieceHash,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
err = processClient.Send(message)
|
|
|
|
require.NoError(t, err)
|
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
|
|
|
|
2019-10-25 18:16:20 +01:00
|
|
|
response, err = processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_ExitFailed:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
require.NotNil(t, m.ExitFailed)
|
|
|
|
require.Equal(t, m.ExitFailed.Reason, pb.ExitFailed_VERIFICATION_FAILED)
|
2020-01-03 19:11:47 +00:00
|
|
|
|
|
|
|
node, err := satellite.DB.OverlayCache().Get(ctx, m.ExitFailed.NodeId)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, node.Disqualified)
|
2019-10-25 18:16:20 +01:00
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
2019-10-24 20:38:40 +01:00
|
|
|
|
|
|
|
// check that the exit has completed and we have the correct transferred/failed values
|
2019-11-07 17:19:34 +00:00
|
|
|
progress, err := satellite.DB.GracefulExit().GetProgress(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
require.Equal(t, int64(0), progress.PiecesTransferred)
|
|
|
|
require.Equal(t, int64(1), progress.PiecesFailed)
|
2019-10-11 22:18:05 +01:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-12-16 16:47:31 +00:00
|
|
|
func TestSuccessSegmentUpdate(t *testing.T) {
|
2021-02-10 10:15:19 +00:00
|
|
|
testTransfers(t, 1, 0, testSuccessSegmentUpdate)
|
|
|
|
}
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
func TestSuccessSegmentUpdateMultipart(t *testing.T) {
|
|
|
|
testTransfers(t, 0, 1, testSuccessSegmentUpdate)
|
|
|
|
}
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
func testSuccessSegmentUpdate(t *testing.T, ctx *testcontext.Context, nodeFullIDs map[storj.NodeID]*identity.FullIdentity, satellite *testplanet.Satellite, processClient exitProcessClient, exitingNode *storagenode.Peer, numPieces int) {
|
|
|
|
var recNodeID storj.NodeID
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
response, err := processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
require.NotNil(t, m)
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
pieceReader, err := exitingNode.Storage2.Store.Reader(ctx, satellite.ID(), m.TransferPiece.OriginalPieceId)
|
|
|
|
require.NoError(t, err)
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
header, err := pieceReader.GetPieceHeader()
|
|
|
|
require.NoError(t, err)
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
orderLimit := header.OrderLimit
|
|
|
|
originalPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: orderLimit.PieceId,
|
|
|
|
Hash: header.GetHash(),
|
|
|
|
PieceSize: pieceReader.Size(),
|
|
|
|
Timestamp: header.GetCreationTime(),
|
|
|
|
Signature: header.GetSignature(),
|
|
|
|
}
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
newPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: m.TransferPiece.AddressedOrderLimit.Limit.PieceId,
|
|
|
|
Hash: originalPieceHash.Hash,
|
|
|
|
PieceSize: originalPieceHash.PieceSize,
|
|
|
|
Timestamp: time.Now(),
|
|
|
|
}
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
receivingIdentity := nodeFullIDs[m.TransferPiece.AddressedOrderLimit.Limit.StorageNodeId]
|
|
|
|
require.NotNil(t, receivingIdentity)
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
// get the receiving node piece count before processing
|
|
|
|
recNodeID = receivingIdentity.ID
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
signer := signing.SignerFromFullIdentity(receivingIdentity)
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
signedNewPieceHash, err := signing.SignPieceHash(ctx, signer, newPieceHash)
|
2019-10-25 16:14:22 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
success := &pb.StorageNodeMessage{
|
|
|
|
Message: &pb.StorageNodeMessage_Succeeded{
|
|
|
|
Succeeded: &pb.TransferSucceeded{
|
|
|
|
OriginalPieceId: m.TransferPiece.OriginalPieceId,
|
|
|
|
OriginalPieceHash: originalPieceHash,
|
|
|
|
OriginalOrderLimit: &orderLimit,
|
|
|
|
ReplacementPieceHash: signedNewPieceHash,
|
|
|
|
},
|
|
|
|
},
|
2019-10-25 16:14:22 +01:00
|
|
|
}
|
2021-02-10 10:15:19 +00:00
|
|
|
err = processClient.Send(success)
|
2019-10-25 16:14:22 +01:00
|
|
|
require.NoError(t, err)
|
2021-02-10 10:15:19 +00:00
|
|
|
default:
|
|
|
|
require.FailNow(t, "did not get a TransferPiece message")
|
|
|
|
}
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
response, err = processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
switch response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_DeletePiece:
|
|
|
|
// expect the delete piece message
|
|
|
|
default:
|
|
|
|
require.FailNow(t, "did not get a DeletePiece message")
|
|
|
|
}
|
|
|
|
// check that the exit has completed and we have the correct transferred/failed values
|
|
|
|
progress, err := satellite.DB.GracefulExit().GetProgress(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
2019-10-25 16:14:22 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
require.EqualValues(t, numPieces, progress.PiecesTransferred)
|
|
|
|
// even though we failed 1, it eventually succeeded, so the count should be 0
|
|
|
|
require.EqualValues(t, 0, progress.PiecesFailed)
|
|
|
|
|
|
|
|
segments, err := satellite.Metainfo.Metabase.TestingAllSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, segments, 1)
|
|
|
|
found := 0
|
|
|
|
require.True(t, len(segments[0].Pieces) > 0)
|
|
|
|
for _, piece := range segments[0].Pieces {
|
|
|
|
require.NotEqual(t, exitingNode.ID(), piece.StorageNode)
|
|
|
|
if piece.StorageNode == recNodeID {
|
|
|
|
found++
|
2019-10-25 16:14:22 +01:00
|
|
|
}
|
2021-02-10 10:15:19 +00:00
|
|
|
}
|
|
|
|
require.Equal(t, 1, found)
|
2019-10-25 16:14:22 +01:00
|
|
|
}
|
|
|
|
|
2020-12-16 16:47:31 +00:00
|
|
|
func TestUpdateSegmentFailure_DuplicatedNodeID(t *testing.T) {
|
2021-02-10 10:15:19 +00:00
|
|
|
testTransfers(t, 1, 0, testUpdateSegmentFailureDuplicatedNodeID)
|
|
|
|
}
|
2019-11-05 19:13:45 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
func TestUpdateSegmentFailure_DuplicatedNodeIDMultipart(t *testing.T) {
|
|
|
|
testTransfers(t, 0, 1, testUpdateSegmentFailureDuplicatedNodeID)
|
|
|
|
}
|
2019-10-27 18:20:22 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
func testUpdateSegmentFailureDuplicatedNodeID(t *testing.T, ctx *testcontext.Context, nodeFullIDs map[storj.NodeID]*identity.FullIdentity, satellite *testplanet.Satellite, processClient exitProcessClient, exitingNode *storagenode.Peer, numPieces int) {
|
|
|
|
response, err := processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
2019-10-27 18:20:22 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
var firstRecNodeID storj.NodeID
|
|
|
|
var pieceID storj.PieceID
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
firstRecNodeID = m.TransferPiece.AddressedOrderLimit.Limit.StorageNodeId
|
|
|
|
pieceID = m.TransferPiece.OriginalPieceId
|
2019-10-27 18:20:22 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
pieceReader, err := exitingNode.Storage2.Store.Reader(ctx, satellite.ID(), pieceID)
|
|
|
|
require.NoError(t, err)
|
2019-10-27 18:20:22 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
header, err := pieceReader.GetPieceHeader()
|
|
|
|
require.NoError(t, err)
|
2019-10-27 18:20:22 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
orderLimit := header.OrderLimit
|
|
|
|
originalPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: orderLimit.PieceId,
|
|
|
|
Hash: header.GetHash(),
|
|
|
|
PieceSize: pieceReader.Size(),
|
|
|
|
Timestamp: header.GetCreationTime(),
|
|
|
|
Signature: header.GetSignature(),
|
|
|
|
}
|
2019-10-27 18:20:22 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
newPieceHash := &pb.PieceHash{
|
|
|
|
PieceId: m.TransferPiece.AddressedOrderLimit.Limit.PieceId,
|
|
|
|
Hash: originalPieceHash.Hash,
|
|
|
|
PieceSize: originalPieceHash.PieceSize,
|
|
|
|
Timestamp: time.Now(),
|
2019-10-27 18:20:22 +00:00
|
|
|
}
|
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
receivingNodeIdentity := nodeFullIDs[m.TransferPiece.AddressedOrderLimit.Limit.StorageNodeId]
|
|
|
|
require.NotNil(t, receivingNodeIdentity)
|
|
|
|
signer := signing.SignerFromFullIdentity(receivingNodeIdentity)
|
|
|
|
|
|
|
|
signedNewPieceHash, err := signing.SignPieceHash(ctx, signer, newPieceHash)
|
2019-11-05 19:13:45 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
success := &pb.StorageNodeMessage{
|
|
|
|
Message: &pb.StorageNodeMessage_Succeeded{
|
|
|
|
Succeeded: &pb.TransferSucceeded{
|
|
|
|
OriginalPieceId: pieceID,
|
|
|
|
OriginalPieceHash: originalPieceHash,
|
|
|
|
OriginalOrderLimit: &orderLimit,
|
|
|
|
ReplacementPieceHash: signedNewPieceHash,
|
|
|
|
},
|
|
|
|
},
|
2019-11-05 19:13:45 +00:00
|
|
|
}
|
2019-10-27 18:20:22 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
// update segment to include the new receiving node before responding to satellite
|
2020-12-16 16:47:31 +00:00
|
|
|
segments, err := satellite.Metainfo.Metabase.TestingAllSegments(ctx)
|
2019-10-27 18:20:22 +00:00
|
|
|
require.NoError(t, err)
|
2020-12-16 16:47:31 +00:00
|
|
|
require.Len(t, segments, 1)
|
|
|
|
require.True(t, len(segments[0].Pieces) > 0)
|
2019-10-27 18:20:22 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
pieceToRemove := make(metabase.Pieces, 1)
|
|
|
|
pieceToAdd := make(metabase.Pieces, 1)
|
2020-12-16 16:47:31 +00:00
|
|
|
pieces := segments[0].Pieces
|
2021-02-10 10:15:19 +00:00
|
|
|
|
2019-10-27 18:20:22 +00:00
|
|
|
for _, piece := range pieces {
|
2021-02-10 10:15:19 +00:00
|
|
|
if pieceToRemove[0] == (metabase.Piece{}) && piece.StorageNode != exitingNode.ID() {
|
|
|
|
pieceToRemove[0] = piece
|
|
|
|
continue
|
|
|
|
}
|
2019-10-27 18:20:22 +00:00
|
|
|
}
|
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
pieceToAdd[0] = metabase.Piece{
|
|
|
|
Number: pieceToRemove[0].Number,
|
|
|
|
StorageNode: firstRecNodeID,
|
|
|
|
}
|
2019-10-27 18:20:22 +00:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
err = satellite.GracefulExit.Endpoint.UpdatePiecesCheckDuplicates(ctx, segments[0], pieceToAdd, pieceToRemove, false)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
err = processClient.Send(success)
|
|
|
|
require.NoError(t, err)
|
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
|
|
|
|
|
|
|
response, err = processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
// validate we get a new node to transfer too
|
|
|
|
require.True(t, m.TransferPiece.OriginalPieceId == pieceID)
|
|
|
|
require.True(t, m.TransferPiece.AddressedOrderLimit.Limit.StorageNodeId != firstRecNodeID)
|
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
|
|
|
|
|
|
|
// check exiting node is still in the segment
|
|
|
|
segments, err := satellite.Metainfo.Metabase.TestingAllSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, segments, 1)
|
|
|
|
|
|
|
|
require.True(t, len(segments[0].Pieces) > 0)
|
|
|
|
|
|
|
|
pieces := segments[0].Pieces
|
|
|
|
pieceMap := make(map[storj.NodeID]int)
|
|
|
|
for _, piece := range pieces {
|
|
|
|
pieceMap[piece.StorageNode]++
|
|
|
|
}
|
|
|
|
|
|
|
|
exitingNodeID := exitingNode.ID()
|
|
|
|
count, ok := pieceMap[exitingNodeID]
|
|
|
|
require.True(t, ok)
|
|
|
|
require.Equal(t, 1, count)
|
|
|
|
count, ok = pieceMap[firstRecNodeID]
|
|
|
|
require.True(t, ok)
|
|
|
|
require.Equal(t, 1, count)
|
|
|
|
}
|
2019-11-01 14:21:24 +00:00
|
|
|
func TestExitDisabled(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1,
|
|
|
|
StorageNodeCount: 2,
|
|
|
|
UplinkCount: 1,
|
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
|
|
|
|
config.GracefulExit.Enabled = false
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
exitingNode := planet.StorageNodes[0]
|
|
|
|
|
|
|
|
require.Nil(t, satellite.GracefulExit.Chore)
|
|
|
|
require.Nil(t, satellite.GracefulExit.Endpoint)
|
|
|
|
|
2020-05-19 16:49:13 +01:00
|
|
|
conn, err := exitingNode.Dialer.DialNodeURL(ctx, satellite.NodeURL())
|
2019-11-01 14:21:24 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
defer ctx.Check(conn.Close)
|
|
|
|
|
2020-03-25 12:15:27 +00:00
|
|
|
client := pb.NewDRPCSatelliteGracefulExitClient(conn)
|
2019-11-01 14:21:24 +00:00
|
|
|
processClient, err := client.Process(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Process endpoint should return immediately if GE is disabled
|
|
|
|
response, err := processClient.Recv()
|
2019-11-01 21:21:30 +00:00
|
|
|
require.Error(t, err)
|
2020-05-11 06:26:32 +01:00
|
|
|
// drpc will return "Unknown"
|
|
|
|
require.True(t, errs2.IsRPC(err, rpcstatus.Unknown))
|
2019-11-01 14:21:24 +00:00
|
|
|
require.Nil(t, response)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-12-16 16:47:31 +00:00
|
|
|
func TestSegmentChangedOrDeleted(t *testing.T) {
|
2019-11-07 16:13:05 +00:00
|
|
|
successThreshold := 4
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1,
|
|
|
|
StorageNodeCount: successThreshold + 1,
|
|
|
|
UplinkCount: 1,
|
2020-01-21 10:38:41 +00:00
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: testplanet.ReconfigureRS(2, 3, successThreshold, successThreshold),
|
|
|
|
},
|
2019-11-07 16:13:05 +00:00
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
uplinkPeer := planet.Uplinks[0]
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
|
|
|
|
satellite.GracefulExit.Chore.Loop.Pause()
|
|
|
|
|
2020-01-21 10:38:41 +00:00
|
|
|
err := uplinkPeer.Upload(ctx, satellite, "testbucket", "test/path0", testrand.Bytes(5*memory.KiB))
|
2019-11-07 16:13:05 +00:00
|
|
|
require.NoError(t, err)
|
2020-01-21 10:38:41 +00:00
|
|
|
err = uplinkPeer.Upload(ctx, satellite, "testbucket", "test/path1", testrand.Bytes(5*memory.KiB))
|
2019-11-07 16:13:05 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// check that there are no exiting nodes.
|
|
|
|
exitingNodes, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitingNodes, 0)
|
|
|
|
|
|
|
|
exitingNode, err := findNodeToExit(ctx, planet, 2)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
exitRequest := &overlay.ExitStatusRequest{
|
|
|
|
NodeID: exitingNode.ID(),
|
|
|
|
ExitInitiatedAt: time.Now(),
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = satellite.DB.OverlayCache().UpdateExitStatus(ctx, exitRequest)
|
|
|
|
require.NoError(t, err)
|
|
|
|
err = satellite.DB.GracefulExit().IncrementProgress(ctx, exitingNode.ID(), 0, 0, 0)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
exitingNodes, err = satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitingNodes, 1)
|
|
|
|
require.Equal(t, exitingNode.ID(), exitingNodes[0].NodeID)
|
|
|
|
|
|
|
|
// trigger the metainfo loop chore so we can get some pieces to transfer
|
|
|
|
satellite.GracefulExit.Chore.Loop.TriggerWait()
|
|
|
|
|
|
|
|
// make sure all the pieces are in the transfer queue
|
|
|
|
incomplete, err := satellite.DB.GracefulExit().GetIncomplete(ctx, exitingNode.ID(), 10, 0)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, incomplete, 2)
|
|
|
|
|
|
|
|
// updating the first object and deleting the second. this will cause a root piece ID change which will result in
|
|
|
|
// a successful graceful exit instead of a request to transfer pieces since the root piece IDs will have changed.
|
2020-01-21 10:38:41 +00:00
|
|
|
err = uplinkPeer.Upload(ctx, satellite, "testbucket", "test/path0", testrand.Bytes(5*memory.KiB))
|
2019-11-07 16:13:05 +00:00
|
|
|
require.NoError(t, err)
|
2020-02-10 12:18:18 +00:00
|
|
|
err = uplinkPeer.DeleteObject(ctx, satellite, "testbucket", "test/path1")
|
2019-11-07 16:13:05 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// reconnect to the satellite.
|
2020-05-19 16:49:13 +01:00
|
|
|
conn, err := exitingNode.Dialer.DialNodeURL(ctx, satellite.NodeURL())
|
2019-11-07 16:13:05 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
defer ctx.Check(conn.Close)
|
|
|
|
|
2020-03-25 12:15:27 +00:00
|
|
|
client := pb.NewDRPCSatelliteGracefulExitClient(conn)
|
2019-11-07 16:13:05 +00:00
|
|
|
|
|
|
|
c, err := client.Process(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
defer ctx.Check(c.CloseSend)
|
|
|
|
|
|
|
|
response, err := c.Recv()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// we expect an exit completed b/c there is nothing to do here
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_ExitCompleted:
|
|
|
|
signee := signing.SigneeFromPeerIdentity(satellite.Identity.PeerIdentity())
|
|
|
|
err = signing.VerifyExitCompleted(ctx, signee, m.ExitCompleted)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
exitStatus, err := satellite.DB.OverlayCache().GetExitStatus(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, exitStatus.ExitFinishedAt)
|
|
|
|
require.True(t, exitStatus.ExitSuccess)
|
|
|
|
default:
|
2021-02-10 10:15:19 +00:00
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
|
|
|
|
|
|
|
queueItems, err := satellite.DB.GracefulExit().GetIncomplete(ctx, exitingNode.ID(), 2, 0)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, queueItems, 0)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestSegmentChangedOrDeletedMultipart(t *testing.T) {
|
|
|
|
successThreshold := 4
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1,
|
|
|
|
StorageNodeCount: successThreshold + 1,
|
|
|
|
UplinkCount: 1,
|
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: testplanet.ReconfigureRS(2, 3, successThreshold, successThreshold),
|
|
|
|
},
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
uplinkPeer := planet.Uplinks[0]
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
|
|
|
|
project, err := uplinkPeer.GetProject(ctx, satellite)
|
|
|
|
require.NoError(t, err)
|
|
|
|
defer func() { require.NoError(t, project.Close()) }()
|
|
|
|
|
|
|
|
satellite.GracefulExit.Chore.Loop.Pause()
|
|
|
|
|
|
|
|
_, err = project.EnsureBucket(ctx, "testbucket")
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// TODO: activate when an object part can be overwritten
|
|
|
|
// info0, err := multipart.NewMultipartUpload(ctx, project, "testbucket", "test/path0", nil)
|
|
|
|
// require.NoError(t, err)
|
2021-03-29 12:21:39 +01:00
|
|
|
// _, err = multipart.PutObjectPart(ctx, project, "testbucket", "test/path0", info0.StreamID, 1,
|
|
|
|
// etag.NewHashReader(bytes.NewReader(testrand.Bytes(5*memory.KiB)), sha256.New()))
|
2021-02-10 10:15:19 +00:00
|
|
|
// require.NoError(t, err)
|
|
|
|
|
2021-04-20 09:06:56 +01:00
|
|
|
info1, err := project.BeginUpload(ctx, "testbucket", "test/path1", nil)
|
2021-02-10 10:15:19 +00:00
|
|
|
require.NoError(t, err)
|
2021-04-20 09:06:56 +01:00
|
|
|
|
|
|
|
upload, err := project.UploadPart(ctx, "testbucket", "test/path1", info1.UploadID, 1)
|
|
|
|
require.NoError(t, err)
|
|
|
|
_, err = upload.Write(testrand.Bytes(5 * memory.KiB))
|
2021-02-10 10:15:19 +00:00
|
|
|
require.NoError(t, err)
|
2021-04-20 09:06:56 +01:00
|
|
|
require.NoError(t, upload.Commit())
|
2021-02-10 10:15:19 +00:00
|
|
|
|
|
|
|
// check that there are no exiting nodes.
|
|
|
|
exitingNodes, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitingNodes, 0)
|
|
|
|
|
|
|
|
exitingNode, err := findNodeToExit(ctx, planet, 2)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
exitRequest := &overlay.ExitStatusRequest{
|
|
|
|
NodeID: exitingNode.ID(),
|
|
|
|
ExitInitiatedAt: time.Now(),
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = satellite.DB.OverlayCache().UpdateExitStatus(ctx, exitRequest)
|
|
|
|
require.NoError(t, err)
|
|
|
|
err = satellite.DB.GracefulExit().IncrementProgress(ctx, exitingNode.ID(), 0, 0, 0)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
exitingNodes, err = satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitingNodes, 1)
|
|
|
|
require.Equal(t, exitingNode.ID(), exitingNodes[0].NodeID)
|
|
|
|
|
|
|
|
// trigger the metainfo loop chore so we can get some pieces to transfer
|
|
|
|
satellite.GracefulExit.Chore.Loop.TriggerWait()
|
|
|
|
|
|
|
|
// make sure all the pieces are in the transfer queue
|
|
|
|
incomplete, err := satellite.DB.GracefulExit().GetIncomplete(ctx, exitingNode.ID(), 10, 0)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, incomplete, 1)
|
|
|
|
// TODO: change to this when an object part can be overwritten
|
|
|
|
// require.Len(t, incomplete, 2)
|
|
|
|
|
|
|
|
// updating the first object and deleting the second. this will cause a root piece ID change which will result in
|
|
|
|
// a successful graceful exit instead of a request to transfer pieces since the root piece IDs will have changed.
|
|
|
|
// TODO: activate when an object part can be overwritten
|
|
|
|
// _, err = multipart.PutObjectPart(ctx, project, "testbucket", "test/path0", info0.StreamID, 1, bytes.NewReader(testrand.Bytes(5*memory.KiB)))
|
|
|
|
// require.NoError(t, err)
|
2021-04-20 09:06:56 +01:00
|
|
|
err = project.AbortUpload(ctx, "testbucket", "test/path1", info1.UploadID)
|
2021-02-10 10:15:19 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// reconnect to the satellite.
|
|
|
|
conn, err := exitingNode.Dialer.DialNodeURL(ctx, satellite.NodeURL())
|
|
|
|
require.NoError(t, err)
|
|
|
|
defer ctx.Check(conn.Close)
|
|
|
|
|
|
|
|
client := pb.NewDRPCSatelliteGracefulExitClient(conn)
|
|
|
|
|
|
|
|
c, err := client.Process(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
defer ctx.Check(c.CloseSend)
|
|
|
|
|
|
|
|
response, err := c.Recv()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// we expect an exit completed b/c there is nothing to do here
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_ExitCompleted:
|
|
|
|
signee := signing.SigneeFromPeerIdentity(satellite.Identity.PeerIdentity())
|
|
|
|
err = signing.VerifyExitCompleted(ctx, signee, m.ExitCompleted)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
exitStatus, err := satellite.DB.OverlayCache().GetExitStatus(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, exitStatus.ExitFinishedAt)
|
|
|
|
require.True(t, exitStatus.ExitSuccess)
|
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
2019-11-07 16:13:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
queueItems, err := satellite.DB.GracefulExit().GetIncomplete(ctx, exitingNode.ID(), 2, 0)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, queueItems, 0)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-11-24 09:09:48 +00:00
|
|
|
func TestFailureNotFound(t *testing.T) {
|
2021-02-10 10:15:19 +00:00
|
|
|
testTransfers(t, 1, 0, func(t *testing.T, ctx *testcontext.Context, nodeFullIDs map[storj.NodeID]*identity.FullIdentity, satellite *testplanet.Satellite, processClient exitProcessClient, exitingNode *storagenode.Peer, numPieces int) {
|
2019-11-05 15:04:39 +00:00
|
|
|
response, err := processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
|
|
|
|
message := &pb.StorageNodeMessage{
|
|
|
|
Message: &pb.StorageNodeMessage_Failed{
|
|
|
|
Failed: &pb.TransferFailed{
|
|
|
|
OriginalPieceId: m.TransferPiece.OriginalPieceId,
|
|
|
|
Error: pb.TransferFailed_NOT_FOUND,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
err = processClient.Send(message)
|
|
|
|
require.NoError(t, err)
|
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
|
|
|
|
|
|
|
response, err = processClient.Recv()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
switch m := response.GetMessage().(type) {
|
|
|
|
case *pb.SatelliteMessage_ExitFailed:
|
|
|
|
require.NotNil(t, m)
|
|
|
|
require.NotNil(t, m.ExitFailed)
|
|
|
|
require.Equal(t, m.ExitFailed.Reason, pb.ExitFailed_OVERALL_FAILURE_PERCENTAGE_EXCEEDED)
|
2020-01-03 19:11:47 +00:00
|
|
|
|
|
|
|
node, err := satellite.DB.OverlayCache().Get(ctx, m.ExitFailed.NodeId)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, node.Disqualified)
|
2019-11-05 15:04:39 +00:00
|
|
|
default:
|
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
|
|
|
}
|
|
|
|
|
2020-12-16 16:47:31 +00:00
|
|
|
// check that node is no longer in the segment
|
2019-11-05 15:04:39 +00:00
|
|
|
|
2020-12-16 16:47:31 +00:00
|
|
|
segments, err := satellite.Metainfo.Metabase.TestingAllSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, segments, 1)
|
2019-11-05 15:04:39 +00:00
|
|
|
|
2020-12-16 16:47:31 +00:00
|
|
|
for _, piece := range segments[0].Pieces {
|
|
|
|
require.NotEqual(t, piece.StorageNode, exitingNode.ID())
|
2019-11-05 15:04:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// check that the exit has completed and we have the correct transferred/failed values
|
|
|
|
progress, err := satellite.DB.GracefulExit().GetProgress(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
require.Equal(t, int64(0), progress.PiecesTransferred)
|
|
|
|
require.Equal(t, int64(1), progress.PiecesFailed)
|
|
|
|
})
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2019-11-13 14:54:50 +00:00
|
|
|
func TestFailureStorageNodeIgnoresTransferMessages(t *testing.T) {
|
|
|
|
var maxOrderLimitSendCount = 3
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1,
|
|
|
|
StorageNodeCount: 5,
|
|
|
|
UplinkCount: 1,
|
|
|
|
Reconfigure: testplanet.Reconfigure{
|
2020-10-27 17:34:59 +00:00
|
|
|
Satellite: testplanet.Combine(
|
|
|
|
func(log *zap.Logger, index int, config *satellite.Config) {
|
|
|
|
// We don't care whether a node gracefully exits or not in this test,
|
|
|
|
// so we set the max failures percentage extra high.
|
|
|
|
config.GracefulExit.OverallMaxFailuresPercentage = 101
|
|
|
|
config.GracefulExit.MaxOrderLimitSendCount = maxOrderLimitSendCount
|
|
|
|
},
|
|
|
|
testplanet.ReconfigureRS(2, 3, 4, 4),
|
|
|
|
),
|
2019-11-13 14:54:50 +00:00
|
|
|
},
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
uplinkPeer := planet.Uplinks[0]
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
|
|
|
|
satellite.GracefulExit.Chore.Loop.Pause()
|
|
|
|
|
|
|
|
nodeFullIDs := make(map[storj.NodeID]*identity.FullIdentity)
|
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
nodeFullIDs[node.ID()] = node.Identity
|
|
|
|
}
|
|
|
|
|
2020-01-21 10:38:41 +00:00
|
|
|
err := uplinkPeer.Upload(ctx, satellite, "testbucket", "test/path", testrand.Bytes(5*memory.KiB))
|
2019-11-13 14:54:50 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// check that there are no exiting nodes.
|
|
|
|
exitingNodes, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitingNodes, 0)
|
|
|
|
|
|
|
|
exitingNode, err := findNodeToExit(ctx, planet, 1)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// connect to satellite so we initiate the exit.
|
2020-05-19 16:49:13 +01:00
|
|
|
conn, err := exitingNode.Dialer.DialNodeURL(ctx, satellite.NodeURL())
|
2019-11-13 14:54:50 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
defer ctx.Check(conn.Close)
|
|
|
|
|
2020-03-25 12:15:27 +00:00
|
|
|
client := pb.NewDRPCSatelliteGracefulExitClient(conn)
|
2019-11-13 14:54:50 +00:00
|
|
|
|
|
|
|
c, err := client.Process(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
response, err := c.Recv()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// should get a NotReady since the metainfo loop would not be finished at this point.
|
2021-02-10 10:15:19 +00:00
|
|
|
switch m := response.GetMessage().(type) {
|
2019-11-13 14:54:50 +00:00
|
|
|
case *pb.SatelliteMessage_NotReady:
|
|
|
|
// now check that the exiting node is initiated.
|
|
|
|
exitingNodes, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitingNodes, 1)
|
|
|
|
|
|
|
|
require.Equal(t, exitingNode.ID(), exitingNodes[0].NodeID)
|
|
|
|
default:
|
2021-02-10 10:15:19 +00:00
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
2019-11-13 14:54:50 +00:00
|
|
|
}
|
|
|
|
// close the old client
|
|
|
|
require.NoError(t, c.CloseSend())
|
|
|
|
|
|
|
|
// trigger the metainfo loop chore so we can get some pieces to transfer
|
|
|
|
satellite.GracefulExit.Chore.Loop.TriggerWait()
|
|
|
|
|
|
|
|
// make sure all the pieces are in the transfer queue
|
|
|
|
_, err = satellite.DB.GracefulExit().GetIncomplete(ctx, exitingNode.ID(), 1, 0)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
var messageCount int
|
|
|
|
|
|
|
|
// We need to label this outer loop so that we're able to exit it from the inner loop.
|
|
|
|
// The outer loop is for sending the request from node to satellite multiple times.
|
|
|
|
// The inner loop is for reading the response.
|
|
|
|
MessageLoop:
|
|
|
|
for {
|
|
|
|
var unknownMsgSent bool
|
|
|
|
c, err := client.Process(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
for {
|
|
|
|
response, err := c.Recv()
|
|
|
|
if unknownMsgSent {
|
|
|
|
require.Error(t, err)
|
|
|
|
break
|
|
|
|
} else {
|
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
switch m := response.GetMessage().(type) {
|
2019-11-13 14:54:50 +00:00
|
|
|
case *pb.SatelliteMessage_ExitCompleted:
|
|
|
|
break MessageLoop
|
|
|
|
case *pb.SatelliteMessage_TransferPiece:
|
|
|
|
messageCount++
|
|
|
|
unknownMsgSent = true
|
|
|
|
// We send an unknown message because we want to fail the
|
|
|
|
// transfer message request we get from the satellite.
|
|
|
|
// This allows us to keep the conn open but repopulate
|
|
|
|
// the pending queue.
|
|
|
|
err = c.Send(&pb.StorageNodeMessage{})
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, c.CloseSend())
|
|
|
|
default:
|
2021-02-10 10:15:19 +00:00
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
2019-11-13 14:54:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
require.Equal(t, messageCount, maxOrderLimitSendCount)
|
|
|
|
|
|
|
|
// make sure not responding piece not in queue
|
|
|
|
incompletes, err := satellite.DB.GracefulExit().GetIncomplete(ctx, exitingNode.ID(), 10, 0)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, incompletes, 0)
|
|
|
|
|
|
|
|
// check that the exit has completed and we have the correct transferred/failed values
|
|
|
|
progress, err := satellite.DB.GracefulExit().GetProgress(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.EqualValues(t, 1, progress.PiecesFailed)
|
|
|
|
status, err := satellite.DB.OverlayCache().GetExitStatus(ctx, exitingNode.ID())
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, status.ExitFinishedAt)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2019-12-20 21:51:13 +00:00
|
|
|
func TestIneligibleNodeAge(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1,
|
|
|
|
StorageNodeCount: 5,
|
|
|
|
UplinkCount: 1,
|
|
|
|
Reconfigure: testplanet.Reconfigure{
|
2020-10-27 17:34:59 +00:00
|
|
|
Satellite: testplanet.Combine(
|
|
|
|
func(log *zap.Logger, index int, config *satellite.Config) {
|
|
|
|
// Set the required node age to 1 month.
|
|
|
|
config.GracefulExit.NodeMinAgeInMonths = 1
|
|
|
|
},
|
|
|
|
testplanet.ReconfigureRS(2, 3, 4, 4),
|
|
|
|
),
|
2019-12-20 21:51:13 +00:00
|
|
|
},
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
uplinkPeer := planet.Uplinks[0]
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
|
|
|
|
satellite.GracefulExit.Chore.Loop.Pause()
|
|
|
|
|
|
|
|
nodeFullIDs := make(map[storj.NodeID]*identity.FullIdentity)
|
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
nodeFullIDs[node.ID()] = node.Identity
|
|
|
|
}
|
|
|
|
|
2020-01-21 10:38:41 +00:00
|
|
|
err := uplinkPeer.Upload(ctx, satellite, "testbucket", "test/path", testrand.Bytes(5*memory.KiB))
|
2019-12-20 21:51:13 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// check that there are no exiting nodes.
|
|
|
|
exitingNodes, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitingNodes, 0)
|
|
|
|
|
|
|
|
exitingNode, err := findNodeToExit(ctx, planet, 1)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// connect to satellite so we initiate the exit.
|
2020-05-19 16:49:13 +01:00
|
|
|
conn, err := exitingNode.Dialer.DialNodeURL(ctx, satellite.NodeURL())
|
2019-12-20 21:51:13 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
defer ctx.Check(conn.Close)
|
|
|
|
|
2020-03-25 12:15:27 +00:00
|
|
|
client := pb.NewDRPCSatelliteGracefulExitClient(conn)
|
2019-12-20 21:51:13 +00:00
|
|
|
|
|
|
|
c, err := client.Process(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
_, err = c.Recv()
|
|
|
|
// expect the node ineligible error here
|
|
|
|
require.Error(t, err)
|
|
|
|
require.True(t, errs2.IsRPC(err, rpcstatus.FailedPrecondition))
|
|
|
|
|
|
|
|
// check that there are still no exiting nodes
|
|
|
|
exitingNodes, err = satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitingNodes, 0)
|
|
|
|
|
|
|
|
// close the old client
|
|
|
|
require.NoError(t, c.CloseSend())
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
func testTransfers(t *testing.T, objects int, multipartObjects int, verifier func(t *testing.T, ctx *testcontext.Context, nodeFullIDs map[storj.NodeID]*identity.FullIdentity, satellite *testplanet.Satellite, processClient exitProcessClient, exitingNode *storagenode.Peer, numPieces int)) {
|
2020-01-21 10:38:41 +00:00
|
|
|
const successThreshold = 4
|
2019-10-11 22:18:05 +01:00
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1,
|
|
|
|
StorageNodeCount: successThreshold + 1,
|
|
|
|
UplinkCount: 1,
|
2020-01-21 10:38:41 +00:00
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: testplanet.ReconfigureRS(2, 3, successThreshold, successThreshold),
|
|
|
|
},
|
2019-10-11 22:18:05 +01:00
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
uplinkPeer := planet.Uplinks[0]
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
project, err := uplinkPeer.GetProject(ctx, satellite)
|
|
|
|
require.NoError(t, err)
|
|
|
|
defer func() { require.NoError(t, project.Close()) }()
|
|
|
|
|
|
|
|
_, err = project.EnsureBucket(ctx, "testbucket")
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
2019-10-11 22:18:05 +01:00
|
|
|
satellite.GracefulExit.Chore.Loop.Pause()
|
|
|
|
|
2019-10-24 20:38:40 +01:00
|
|
|
nodeFullIDs := make(map[storj.NodeID]*identity.FullIdentity)
|
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
nodeFullIDs[node.ID()] = node.Identity
|
|
|
|
}
|
|
|
|
|
2019-10-11 22:18:05 +01:00
|
|
|
for i := 0; i < objects; i++ {
|
2020-01-21 10:38:41 +00:00
|
|
|
err := uplinkPeer.Upload(ctx, satellite, "testbucket", "test/path"+strconv.Itoa(i), testrand.Bytes(5*memory.KiB))
|
2019-10-11 22:18:05 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
2019-10-24 20:38:40 +01:00
|
|
|
|
2021-02-10 10:15:19 +00:00
|
|
|
for i := 0; i < multipartObjects; i++ {
|
|
|
|
objectName := "test/multipart" + strconv.Itoa(i)
|
|
|
|
|
2021-04-20 09:06:56 +01:00
|
|
|
info, err := project.BeginUpload(ctx, "testbucket", objectName, nil)
|
2021-02-10 10:15:19 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2021-04-20 09:06:56 +01:00
|
|
|
upload, err := project.UploadPart(ctx, "testbucket", objectName, info.UploadID, 1)
|
|
|
|
require.NoError(t, err)
|
|
|
|
_, err = upload.Write(testrand.Bytes(5 * memory.KiB))
|
2021-02-10 10:15:19 +00:00
|
|
|
require.NoError(t, err)
|
2021-04-20 09:06:56 +01:00
|
|
|
require.NoError(t, upload.Commit())
|
2021-02-10 10:15:19 +00:00
|
|
|
}
|
|
|
|
|
2019-10-11 22:18:05 +01:00
|
|
|
// check that there are no exiting nodes.
|
2019-10-24 17:24:42 +01:00
|
|
|
exitingNodes, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
2019-10-11 22:18:05 +01:00
|
|
|
require.NoError(t, err)
|
2019-10-24 17:24:42 +01:00
|
|
|
require.Len(t, exitingNodes, 0)
|
2019-10-11 22:18:05 +01:00
|
|
|
|
|
|
|
exitingNode, err := findNodeToExit(ctx, planet, objects)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// connect to satellite so we initiate the exit.
|
2020-05-19 16:49:13 +01:00
|
|
|
conn, err := exitingNode.Dialer.DialNodeURL(ctx, satellite.NodeURL())
|
2019-10-11 22:18:05 +01:00
|
|
|
require.NoError(t, err)
|
2019-11-01 14:21:24 +00:00
|
|
|
defer ctx.Check(conn.Close)
|
2019-10-11 22:18:05 +01:00
|
|
|
|
2020-03-25 12:15:27 +00:00
|
|
|
client := pb.NewDRPCSatelliteGracefulExitClient(conn)
|
2019-10-11 22:18:05 +01:00
|
|
|
|
2019-10-12 14:06:20 +01:00
|
|
|
c, err := client.Process(ctx)
|
2019-10-11 22:18:05 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
response, err := c.Recv()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// should get a NotReady since the metainfo loop would not be finished at this point.
|
2021-02-10 10:15:19 +00:00
|
|
|
switch m := response.GetMessage().(type) {
|
2019-10-11 22:18:05 +01:00
|
|
|
case *pb.SatelliteMessage_NotReady:
|
|
|
|
// now check that the exiting node is initiated.
|
2019-10-24 17:24:42 +01:00
|
|
|
exitingNodes, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
|
2019-10-11 22:18:05 +01:00
|
|
|
require.NoError(t, err)
|
2019-10-24 17:24:42 +01:00
|
|
|
require.Len(t, exitingNodes, 1)
|
2019-10-11 22:18:05 +01:00
|
|
|
|
2019-10-24 17:24:42 +01:00
|
|
|
require.Equal(t, exitingNode.ID(), exitingNodes[0].NodeID)
|
2019-10-11 22:18:05 +01:00
|
|
|
default:
|
2021-02-10 10:15:19 +00:00
|
|
|
require.FailNow(t, "should not reach this case: %#v", m)
|
2019-10-11 22:18:05 +01:00
|
|
|
}
|
|
|
|
// close the old client
|
|
|
|
require.NoError(t, c.CloseSend())
|
|
|
|
|
|
|
|
// trigger the metainfo loop chore so we can get some pieces to transfer
|
|
|
|
satellite.GracefulExit.Chore.Loop.TriggerWait()
|
|
|
|
|
|
|
|
// make sure all the pieces are in the transfer queue
|
2021-02-10 10:15:19 +00:00
|
|
|
incompleteTransfers, err := satellite.DB.GracefulExit().GetIncomplete(ctx, exitingNode.ID(), objects+multipartObjects, 0)
|
2019-10-11 22:18:05 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// connect to satellite again to start receiving transfers
|
2019-10-12 14:06:20 +01:00
|
|
|
c, err = client.Process(ctx)
|
2019-10-11 22:18:05 +01:00
|
|
|
require.NoError(t, err)
|
2019-11-01 14:21:24 +00:00
|
|
|
defer ctx.Check(c.CloseSend)
|
2019-10-11 22:18:05 +01:00
|
|
|
|
2020-10-20 20:58:54 +01:00
|
|
|
verifier(t, ctx, nodeFullIDs, satellite, c, exitingNode.Peer, len(incompleteTransfers))
|
2019-10-11 22:18:05 +01:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-10-20 20:58:54 +01:00
|
|
|
func findNodeToExit(ctx context.Context, planet *testplanet.Planet, objects int) (*testplanet.StorageNode, error) {
|
2019-10-11 22:18:05 +01:00
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
|
|
|
|
pieceCountMap := make(map[storj.NodeID]int, len(planet.StorageNodes))
|
2020-05-07 09:23:40 +01:00
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
pieceCountMap[node.ID()] = 0
|
2019-10-11 22:18:05 +01:00
|
|
|
}
|
|
|
|
|
2020-12-16 16:47:31 +00:00
|
|
|
segments, err := satellite.Metainfo.Metabase.TestingAllSegments(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
for _, segment := range segments {
|
|
|
|
for _, piece := range segment.Pieces {
|
|
|
|
pieceCountMap[piece.StorageNode]++
|
2019-10-11 22:18:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var exitingNodeID storj.NodeID
|
|
|
|
maxCount := 0
|
|
|
|
for k, v := range pieceCountMap {
|
|
|
|
if exitingNodeID.IsZero() {
|
|
|
|
exitingNodeID = k
|
|
|
|
maxCount = v
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if v > maxCount {
|
|
|
|
exitingNodeID = k
|
|
|
|
maxCount = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-20 20:58:54 +01:00
|
|
|
return planet.FindNode(exitingNodeID), nil
|
2019-10-11 22:18:05 +01:00
|
|
|
}
|
2020-12-18 11:33:28 +00:00
|
|
|
|
|
|
|
func TestUpdatePiecesCheckDuplicates(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 3, UplinkCount: 1,
|
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: testplanet.ReconfigureRS(1, 1, 3, 3),
|
|
|
|
},
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
uplinkPeer := planet.Uplinks[0]
|
|
|
|
path := "test/path"
|
|
|
|
|
|
|
|
err := uplinkPeer.Upload(ctx, satellite, "test1", path, testrand.Bytes(5*memory.KiB))
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
segments, err := satellite.Metainfo.Metabase.TestingAllSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, segments, 1)
|
|
|
|
|
|
|
|
pieces := segments[0].Pieces
|
|
|
|
require.False(t, hasDuplicates(pieces))
|
|
|
|
|
|
|
|
// Remove second piece in the list and replace it with
|
|
|
|
// a piece on the first node.
|
|
|
|
// This way we can ensure that we use a valid piece num.
|
|
|
|
removePiece := metabase.Piece{
|
|
|
|
Number: pieces[1].Number,
|
|
|
|
StorageNode: pieces[1].StorageNode,
|
|
|
|
}
|
|
|
|
addPiece := metabase.Piece{
|
|
|
|
Number: pieces[1].Number,
|
|
|
|
StorageNode: pieces[0].StorageNode,
|
|
|
|
}
|
|
|
|
|
|
|
|
// test no duplicates
|
|
|
|
err = satellite.GracefulExit.Endpoint.UpdatePiecesCheckDuplicates(ctx, segments[0], metabase.Pieces{addPiece}, metabase.Pieces{removePiece}, true)
|
|
|
|
require.True(t, metainfo.ErrNodeAlreadyExists.Has(err))
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func hasDuplicates(pieces metabase.Pieces) bool {
|
|
|
|
nodePieceCounts := make(map[storj.NodeID]int)
|
|
|
|
for _, piece := range pieces {
|
|
|
|
nodePieceCounts[piece.StorageNode]++
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, count := range nodePieceCounts {
|
|
|
|
if count > 1 {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|