2019-10-15 16:29:47 +01:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package gracefulexit_test
|
|
|
|
|
|
|
|
import (
|
2019-10-22 21:42:21 +01:00
|
|
|
"context"
|
2019-10-15 16:29:47 +01:00
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/stretchr/testify/require"
|
2019-10-22 21:42:21 +01:00
|
|
|
"github.com/zeebo/errs"
|
2019-10-15 16:29:47 +01:00
|
|
|
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/memory"
|
|
|
|
"storj.io/common/storj"
|
|
|
|
"storj.io/common/testcontext"
|
|
|
|
"storj.io/common/testrand"
|
2019-11-14 19:46:15 +00:00
|
|
|
"storj.io/storj/private/testplanet"
|
2019-10-15 16:29:47 +01:00
|
|
|
"storj.io/storj/satellite/overlay"
|
2023-04-05 18:03:06 +01:00
|
|
|
"storj.io/storj/storagenode/blobstore"
|
2019-10-15 16:29:47 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestChore(t *testing.T) {
|
2020-01-21 10:38:41 +00:00
|
|
|
const successThreshold = 4
|
2019-10-15 16:29:47 +01:00
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1,
|
2019-10-31 14:57:54 +00:00
|
|
|
StorageNodeCount: successThreshold + 2,
|
2019-10-15 16:29:47 +01:00
|
|
|
UplinkCount: 1,
|
2020-01-21 10:38:41 +00:00
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: testplanet.ReconfigureRS(2, 3, successThreshold, successThreshold),
|
|
|
|
},
|
2019-10-15 16:29:47 +01:00
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
satellite1 := planet.Satellites[0]
|
2019-10-22 21:42:21 +01:00
|
|
|
uplinkPeer := planet.Uplinks[0]
|
2019-10-15 16:29:47 +01:00
|
|
|
|
2020-01-21 10:38:41 +00:00
|
|
|
err := uplinkPeer.Upload(ctx, satellite1, "testbucket", "test/path1", testrand.Bytes(5*memory.KiB))
|
2019-10-15 16:29:47 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2020-12-16 16:47:31 +00:00
|
|
|
exitingNode, err := findNodeToExit(ctx, planet)
|
2019-10-15 16:29:47 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2019-10-22 21:42:21 +01:00
|
|
|
nodePieceCounts, err := getNodePieceCounts(ctx, planet)
|
2019-10-15 16:29:47 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2019-10-22 21:42:21 +01:00
|
|
|
exitSatellite(ctx, t, planet, exitingNode)
|
2019-10-15 16:29:47 +01:00
|
|
|
|
2019-10-22 21:42:21 +01:00
|
|
|
newNodePieceCounts, err := getNodePieceCounts(ctx, planet)
|
2019-10-15 16:29:47 +01:00
|
|
|
require.NoError(t, err)
|
2019-10-22 21:42:21 +01:00
|
|
|
var newExitingNodeID storj.NodeID
|
|
|
|
for k, v := range newNodePieceCounts {
|
|
|
|
if v > nodePieceCounts[k] {
|
|
|
|
newExitingNodeID = k
|
|
|
|
}
|
|
|
|
}
|
|
|
|
require.NotNil(t, newExitingNodeID)
|
|
|
|
require.NotEqual(t, exitingNode.ID(), newExitingNodeID)
|
2019-10-15 16:29:47 +01:00
|
|
|
|
2020-03-27 14:46:40 +00:00
|
|
|
newExitingNode := planet.FindNode(newExitingNodeID)
|
2019-10-22 21:42:21 +01:00
|
|
|
require.NotNil(t, newExitingNode)
|
|
|
|
|
2019-10-30 14:46:56 +00:00
|
|
|
exitSatellite(ctx, t, planet, newExitingNode)
|
2019-10-15 16:29:47 +01:00
|
|
|
})
|
|
|
|
}
|
2019-10-22 21:42:21 +01:00
|
|
|
|
2020-03-27 14:46:40 +00:00
|
|
|
func exitSatellite(ctx context.Context, t *testing.T, planet *testplanet.Planet, exitingNode *testplanet.StorageNode) {
|
2019-10-22 21:42:21 +01:00
|
|
|
satellite1 := planet.Satellites[0]
|
|
|
|
exitingNode.GracefulExit.Chore.Loop.Pause()
|
|
|
|
|
2020-01-23 17:47:20 +00:00
|
|
|
_, piecesContentSize, err := exitingNode.Storage2.BlobsCache.SpaceUsedBySatellite(ctx, satellite1.ID())
|
2019-10-30 14:46:56 +00:00
|
|
|
require.NoError(t, err)
|
2020-01-23 17:47:20 +00:00
|
|
|
require.NotZero(t, piecesContentSize)
|
2019-10-30 14:46:56 +00:00
|
|
|
|
2019-10-22 21:42:21 +01:00
|
|
|
exitStatus := overlay.ExitStatusRequest{
|
|
|
|
NodeID: exitingNode.ID(),
|
|
|
|
ExitInitiatedAt: time.Now(),
|
|
|
|
}
|
|
|
|
|
2019-10-30 14:46:56 +00:00
|
|
|
_, err = satellite1.Overlay.DB.UpdateExitStatus(ctx, &exitStatus)
|
2019-10-22 21:42:21 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2020-01-23 17:47:20 +00:00
|
|
|
err = exitingNode.DB.Satellites().InitiateGracefulExit(ctx, satellite1.ID(), time.Now(), piecesContentSize)
|
2019-10-22 21:42:21 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// check that the storage node is exiting
|
|
|
|
exitProgress, err := exitingNode.DB.Satellites().ListGracefulExits(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitProgress, 1)
|
|
|
|
|
|
|
|
// initiate graceful exit on satellite side by running the SN chore.
|
|
|
|
exitingNode.GracefulExit.Chore.Loop.TriggerWait()
|
|
|
|
|
2023-04-24 10:10:00 +01:00
|
|
|
// run the satellite ranged loop to build the transfer queue.
|
|
|
|
_, err = satellite1.RangedLoop.RangedLoop.Service.RunOnce(ctx)
|
|
|
|
require.NoError(t, err)
|
2019-10-22 21:42:21 +01:00
|
|
|
|
|
|
|
// check that the satellite knows the storage node is exiting.
|
|
|
|
exitingNodes, err := satellite1.DB.OverlayCache().GetExitingNodes(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, exitingNodes, 1)
|
2019-10-24 17:24:42 +01:00
|
|
|
require.Equal(t, exitingNode.ID(), exitingNodes[0].NodeID)
|
2019-10-22 21:42:21 +01:00
|
|
|
|
2021-09-05 22:29:22 +01:00
|
|
|
queueItems, err := satellite1.DB.GracefulExit().GetIncomplete(ctx, exitStatus.NodeID, 10, 0)
|
2019-10-22 21:42:21 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, queueItems, 1)
|
|
|
|
|
|
|
|
// run the SN chore again to start processing transfers.
|
|
|
|
exitingNode.GracefulExit.Chore.Loop.TriggerWait()
|
2020-08-23 16:10:14 +01:00
|
|
|
// wait for workers to finish
|
2022-08-01 13:00:23 +01:00
|
|
|
err = exitingNode.GracefulExit.Chore.TestWaitForNoWorkers(ctx)
|
|
|
|
require.NoError(t, err)
|
2019-10-22 21:42:21 +01:00
|
|
|
|
|
|
|
// check that there are no more items to process
|
2021-09-05 22:29:22 +01:00
|
|
|
queueItems, err = satellite1.DB.GracefulExit().GetIncomplete(ctx, exitStatus.NodeID, 10, 0)
|
2019-10-22 21:42:21 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, queueItems, 0)
|
|
|
|
|
|
|
|
exitProgress, err = exitingNode.DB.Satellites().ListGracefulExits(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
for _, progress := range exitProgress {
|
|
|
|
if progress.SatelliteID == satellite1.ID() {
|
2019-10-30 14:46:56 +00:00
|
|
|
require.NotNil(t, progress.CompletionReceipt)
|
2019-10-22 21:42:21 +01:00
|
|
|
require.NotNil(t, progress.FinishedAt)
|
2019-10-30 14:46:56 +00:00
|
|
|
require.EqualValues(t, progress.StartingDiskUsage, progress.BytesDeleted)
|
2019-10-22 21:42:21 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// make sure there are no more pieces on the node.
|
|
|
|
namespaces, err := exitingNode.DB.Pieces().ListNamespaces(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
for _, ns := range namespaces {
|
2023-04-05 18:03:06 +01:00
|
|
|
err = exitingNode.DB.Pieces().WalkNamespace(ctx, ns, func(blobInfo blobstore.BlobInfo) error {
|
2019-10-22 21:42:21 +01:00
|
|
|
return errs.New("found a piece on the node. this shouldn't happen.")
|
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// getNodePieceCounts tallies all the pieces per node.
|
|
|
|
func getNodePieceCounts(ctx context.Context, planet *testplanet.Planet) (_ map[storj.NodeID]int, err error) {
|
|
|
|
nodePieceCounts := make(map[storj.NodeID]int)
|
|
|
|
for _, n := range planet.StorageNodes {
|
|
|
|
node := n
|
|
|
|
namespaces, err := node.DB.Pieces().ListNamespaces(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
for _, ns := range namespaces {
|
2023-04-05 18:03:06 +01:00
|
|
|
err = node.DB.Pieces().WalkNamespace(ctx, ns, func(blobInfo blobstore.BlobInfo) error {
|
2019-10-22 21:42:21 +01:00
|
|
|
nodePieceCounts[node.ID()]++
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nodePieceCounts, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// findNodeToExit selects the node storing the most pieces as the node to graceful exit.
|
2020-12-16 16:47:31 +00:00
|
|
|
func findNodeToExit(ctx context.Context, planet *testplanet.Planet) (*testplanet.StorageNode, error) {
|
2019-10-22 21:42:21 +01:00
|
|
|
satellite := planet.Satellites[0]
|
2020-12-16 16:47:31 +00:00
|
|
|
|
2021-09-07 09:15:47 +01:00
|
|
|
objects, err := satellite.Metabase.DB.TestingAllSegments(ctx)
|
2019-10-22 21:42:21 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pieceCountMap := make(map[storj.NodeID]int, len(planet.StorageNodes))
|
|
|
|
for _, sn := range planet.StorageNodes {
|
|
|
|
pieceCountMap[sn.ID()] = 0
|
|
|
|
}
|
|
|
|
|
2020-12-16 16:47:31 +00:00
|
|
|
for _, object := range objects {
|
|
|
|
for _, piece := range object.Pieces {
|
|
|
|
pieceCountMap[piece.StorageNode]++
|
2019-10-22 21:42:21 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var exitingNodeID storj.NodeID
|
|
|
|
maxCount := 0
|
|
|
|
for k, v := range pieceCountMap {
|
|
|
|
if exitingNodeID.IsZero() {
|
|
|
|
exitingNodeID = k
|
|
|
|
maxCount = v
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if v > maxCount {
|
|
|
|
exitingNodeID = k
|
|
|
|
maxCount = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-27 14:46:40 +00:00
|
|
|
return planet.FindNode(exitingNodeID), nil
|
2019-10-22 21:42:21 +01:00
|
|
|
}
|