2023-05-26 13:22:15 +01:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package repairer_test
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-06-26 10:54:52 +01:00
|
|
|
"strconv"
|
2023-05-26 13:22:15 +01:00
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
|
|
|
|
"storj.io/common/memory"
|
|
|
|
"storj.io/common/pb"
|
|
|
|
"storj.io/common/storj"
|
2023-06-26 10:54:52 +01:00
|
|
|
"storj.io/common/storj/location"
|
2023-05-26 13:22:15 +01:00
|
|
|
"storj.io/common/testcontext"
|
|
|
|
"storj.io/common/testrand"
|
|
|
|
"storj.io/storj/private/testplanet"
|
|
|
|
"storj.io/storj/satellite"
|
|
|
|
"storj.io/storj/satellite/buckets"
|
|
|
|
"storj.io/storj/satellite/metabase"
|
|
|
|
"storj.io/storj/satellite/overlay"
|
|
|
|
"storj.io/storj/satellite/repair/queue"
|
|
|
|
)
|
|
|
|
|
|
|
|
func TestSegmentRepairPlacement(t *testing.T) {
|
|
|
|
piecesCount := 4
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
2023-06-26 10:54:52 +01:00
|
|
|
SatelliteCount: 1, StorageNodeCount: 12, UplinkCount: 1,
|
2023-05-26 13:22:15 +01:00
|
|
|
Reconfigure: testplanet.Reconfigure{
|
2023-07-17 15:23:20 +01:00
|
|
|
Satellite: testplanet.Combine(
|
|
|
|
testplanet.ReconfigureRS(1, 1, piecesCount, piecesCount),
|
|
|
|
func(log *zap.Logger, index int, config *satellite.Config) {
|
|
|
|
config.Repairer.DoDeclumping = false
|
|
|
|
},
|
|
|
|
),
|
2023-05-26 13:22:15 +01:00
|
|
|
},
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
require.NoError(t, planet.Uplinks[0].CreateBucket(ctx, planet.Satellites[0], "testbucket"))
|
|
|
|
|
2023-06-26 10:54:52 +01:00
|
|
|
defaultLocation := location.Poland
|
|
|
|
|
2023-05-26 13:22:15 +01:00
|
|
|
_, err := planet.Satellites[0].API.Buckets.Service.UpdateBucket(ctx, buckets.Bucket{
|
|
|
|
ProjectID: planet.Uplinks[0].Projects[0].ID,
|
|
|
|
Name: "testbucket",
|
|
|
|
Placement: storj.EU,
|
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
type testCase struct {
|
|
|
|
piecesOutOfPlacement int
|
|
|
|
piecesAfterRepair int
|
2023-06-26 10:54:52 +01:00
|
|
|
|
|
|
|
// how many from out of placement pieces should be also offline
|
|
|
|
piecesOutOfPlacementOffline int
|
2023-05-26 13:22:15 +01:00
|
|
|
}
|
|
|
|
|
2023-06-26 10:54:52 +01:00
|
|
|
for i, tc := range []testCase{
|
2023-05-26 13:22:15 +01:00
|
|
|
// all pieces/nodes are out of placement, repair download/upload should be triggered
|
|
|
|
{piecesOutOfPlacement: piecesCount, piecesAfterRepair: piecesCount},
|
2023-06-26 10:54:52 +01:00
|
|
|
|
|
|
|
// all pieces/nodes are out of placement, repair download/upload should be triggered, some pieces are offline
|
|
|
|
{piecesOutOfPlacement: piecesCount, piecesAfterRepair: piecesCount, piecesOutOfPlacementOffline: 1},
|
|
|
|
{piecesOutOfPlacement: piecesCount, piecesAfterRepair: piecesCount, piecesOutOfPlacementOffline: 2},
|
|
|
|
|
2023-05-26 13:22:15 +01:00
|
|
|
// few pieces/nodes are out of placement, repair download/upload should be triggered
|
2023-06-26 10:54:52 +01:00
|
|
|
{piecesOutOfPlacement: piecesCount - 1, piecesAfterRepair: piecesCount},
|
|
|
|
{piecesOutOfPlacement: piecesCount - 1, piecesAfterRepair: piecesCount, piecesOutOfPlacementOffline: 1},
|
|
|
|
|
2023-05-26 13:22:15 +01:00
|
|
|
// single piece/node is out of placement, NO download/upload repair, we are only removing piece from segment
|
|
|
|
// as segment is still above repair threshold
|
|
|
|
{piecesOutOfPlacement: 1, piecesAfterRepair: piecesCount - 1},
|
2023-06-26 10:54:52 +01:00
|
|
|
{piecesOutOfPlacement: 1, piecesAfterRepair: piecesCount - 1, piecesOutOfPlacementOffline: 1},
|
|
|
|
{piecesOutOfPlacement: 1, piecesAfterRepair: piecesCount - 1, piecesOutOfPlacementOffline: 1},
|
2023-05-26 13:22:15 +01:00
|
|
|
} {
|
2023-06-26 10:54:52 +01:00
|
|
|
t.Run("#"+strconv.Itoa(i), func(t *testing.T) {
|
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
require.NoError(t, planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, node.ID(), defaultLocation.String()))
|
|
|
|
}
|
|
|
|
|
|
|
|
require.NoError(t, planet.Satellites[0].Repairer.Overlay.DownloadSelectionCache.Refresh(ctx))
|
|
|
|
|
|
|
|
expectedData := testrand.Bytes(5 * memory.KiB)
|
|
|
|
err = planet.Uplinks[0].Upload(ctx, planet.Satellites[0], "testbucket", "object", expectedData)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
segments, err := planet.Satellites[0].Metabase.DB.TestingAllSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, segments, 1)
|
|
|
|
require.Len(t, segments[0].Pieces, piecesCount)
|
|
|
|
|
|
|
|
for index, piece := range segments[0].Pieces {
|
|
|
|
// make node offline if needed
|
|
|
|
require.NoError(t, updateNodeStatus(ctx, planet.Satellites[0], planet.FindNode(piece.StorageNode), index < tc.piecesOutOfPlacementOffline, defaultLocation))
|
|
|
|
|
|
|
|
if index < tc.piecesOutOfPlacement {
|
|
|
|
require.NoError(t, planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, piece.StorageNode, "US"))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// confirm that some pieces are out of placement
|
|
|
|
ok, err := allPiecesInPlacement(ctx, planet.Satellites[0].Overlay.Service, segments[0].Pieces, segments[0].Placement)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.False(t, ok)
|
|
|
|
|
|
|
|
require.NoError(t, planet.Satellites[0].Repairer.Overlay.DownloadSelectionCache.Refresh(ctx))
|
|
|
|
|
|
|
|
_, err = planet.Satellites[0].Repairer.SegmentRepairer.Repair(ctx, &queue.InjuredSegment{
|
|
|
|
StreamID: segments[0].StreamID,
|
|
|
|
Position: segments[0].Position,
|
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// confirm that all pieces have correct placement
|
|
|
|
segments, err = planet.Satellites[0].Metabase.DB.TestingAllSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, segments, 1)
|
|
|
|
require.NotNil(t, segments[0].RepairedAt)
|
|
|
|
require.Len(t, segments[0].Pieces, tc.piecesAfterRepair)
|
|
|
|
|
|
|
|
ok, err = allPiecesInPlacement(ctx, planet.Satellites[0].Overlay.Service, segments[0].Pieces, segments[0].Placement)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.True(t, ok)
|
|
|
|
|
2023-07-05 12:10:17 +01:00
|
|
|
require.NoError(t, planet.Satellites[0].API.Overlay.Service.DownloadSelectionCache.Refresh(ctx))
|
|
|
|
|
2023-06-26 10:54:52 +01:00
|
|
|
data, err := planet.Uplinks[0].Download(ctx, planet.Satellites[0], "testbucket", "object")
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, expectedData, data)
|
2023-05-26 13:22:15 +01:00
|
|
|
})
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestSegmentRepairPlacementAndClumped(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 8, UplinkCount: 1,
|
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: testplanet.Combine(
|
|
|
|
testplanet.ReconfigureRS(1, 2, 4, 4),
|
|
|
|
func(log *zap.Logger, index int, config *satellite.Config) {
|
|
|
|
config.Checker.DoDeclumping = true
|
|
|
|
config.Repairer.DoDeclumping = true
|
|
|
|
},
|
|
|
|
),
|
|
|
|
},
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
require.NoError(t, planet.Uplinks[0].CreateBucket(ctx, planet.Satellites[0], "testbucket"))
|
|
|
|
|
|
|
|
_, err := planet.Satellites[0].API.Buckets.Service.UpdateBucket(ctx, buckets.Bucket{
|
|
|
|
ProjectID: planet.Uplinks[0].Projects[0].ID,
|
|
|
|
Name: "testbucket",
|
|
|
|
Placement: storj.EU,
|
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
require.NoError(t, planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, node.ID(), "PL"))
|
|
|
|
}
|
|
|
|
|
|
|
|
err = planet.Uplinks[0].Upload(ctx, planet.Satellites[0], "testbucket", "object", testrand.Bytes(5*memory.KiB))
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
require.NoError(t, planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, node.ID(), "PL"))
|
|
|
|
}
|
|
|
|
|
|
|
|
require.NoError(t, planet.Satellites[0].Repairer.Overlay.DownloadSelectionCache.Refresh(ctx))
|
|
|
|
|
|
|
|
segments, err := planet.Satellites[0].Metabase.DB.TestingAllSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, segments, 1)
|
|
|
|
require.Len(t, segments[0].Pieces, 4)
|
|
|
|
|
|
|
|
// set nodes to the same placement/country and put all nodes into the same net to mark them as clumped
|
|
|
|
node0 := planet.FindNode(segments[0].Pieces[0].StorageNode)
|
|
|
|
for _, piece := range segments[0].Pieces {
|
|
|
|
require.NoError(t, planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, piece.StorageNode, "US"))
|
|
|
|
|
|
|
|
local := node0.Contact.Service.Local()
|
|
|
|
checkInInfo := overlay.NodeCheckInInfo{
|
|
|
|
NodeID: piece.StorageNode,
|
|
|
|
Address: &pb.NodeAddress{Address: local.Address},
|
|
|
|
LastIPPort: local.Address,
|
|
|
|
LastNet: node0.Contact.Service.Local().Address,
|
|
|
|
IsUp: true,
|
|
|
|
Operator: &local.Operator,
|
|
|
|
Capacity: &local.Capacity,
|
|
|
|
Version: &local.Version,
|
|
|
|
}
|
|
|
|
err = planet.Satellites[0].DB.OverlayCache().UpdateCheckIn(ctx, checkInInfo, time.Now().UTC(), overlay.NodeSelectionConfig{})
|
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// confirm that some pieces are out of placement
|
|
|
|
ok, err := allPiecesInPlacement(ctx, planet.Satellites[0].Overlay.Service, segments[0].Pieces, segments[0].Placement)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.False(t, ok)
|
|
|
|
|
|
|
|
require.NoError(t, planet.Satellites[0].Repairer.Overlay.DownloadSelectionCache.Refresh(ctx))
|
|
|
|
|
|
|
|
_, err = planet.Satellites[0].Repairer.SegmentRepairer.Repair(ctx, &queue.InjuredSegment{
|
|
|
|
StreamID: segments[0].StreamID,
|
|
|
|
Position: segments[0].Position,
|
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// confirm that all pieces have correct placement
|
|
|
|
segments, err = planet.Satellites[0].Metabase.DB.TestingAllSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, segments, 1)
|
|
|
|
require.NotNil(t, segments[0].RepairedAt)
|
|
|
|
require.Len(t, segments[0].Pieces, 4)
|
|
|
|
|
|
|
|
ok, err = allPiecesInPlacement(ctx, planet.Satellites[0].Overlay.Service, segments[0].Pieces, segments[0].Placement)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.True(t, ok)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-06-12 09:30:35 +01:00
|
|
|
func TestSegmentRepairPlacementNotEnoughNodes(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
2023-06-30 10:02:01 +01:00
|
|
|
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1,
|
2023-06-12 09:30:35 +01:00
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: testplanet.ReconfigureRS(1, 2, 4, 4),
|
|
|
|
},
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
require.NoError(t, planet.Uplinks[0].CreateBucket(ctx, planet.Satellites[0], "testbucket"))
|
|
|
|
|
|
|
|
_, err := planet.Satellites[0].API.Buckets.Service.UpdateBucket(ctx, buckets.Bucket{
|
|
|
|
ProjectID: planet.Uplinks[0].Projects[0].ID,
|
|
|
|
Name: "testbucket",
|
|
|
|
Placement: storj.EU,
|
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
require.NoError(t, planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, node.ID(), "PL"))
|
|
|
|
}
|
|
|
|
|
|
|
|
err = planet.Uplinks[0].Upload(ctx, planet.Satellites[0], "testbucket", "object", testrand.Bytes(5*memory.KiB))
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// change all nodes location to US
|
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
require.NoError(t, planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, node.ID(), "US"))
|
|
|
|
}
|
|
|
|
|
|
|
|
require.NoError(t, planet.Satellites[0].Repairer.Overlay.DownloadSelectionCache.Refresh(ctx))
|
|
|
|
|
|
|
|
segments, err := planet.Satellites[0].Metabase.DB.TestingAllSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, segments, 1)
|
|
|
|
require.Len(t, segments[0].Pieces, 4)
|
|
|
|
|
|
|
|
// we have bucket geofenced to EU but now all nodes are in US, repairing should fail because
|
|
|
|
// not enough nodes are available but segment shouldn't be deleted from repair queue
|
|
|
|
shouldDelete, err := planet.Satellites[0].Repairer.SegmentRepairer.Repair(ctx, &queue.InjuredSegment{
|
|
|
|
StreamID: segments[0].StreamID,
|
|
|
|
Position: segments[0].Position,
|
|
|
|
})
|
2023-06-30 10:02:01 +01:00
|
|
|
require.True(t, overlay.ErrNotEnoughNodes.Has(err))
|
2023-06-12 09:30:35 +01:00
|
|
|
require.False(t, shouldDelete)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-06-30 10:02:01 +01:00
|
|
|
func TestSegmentRepairPlacementAndExcludedCountries(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1,
|
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: testplanet.Combine(
|
|
|
|
testplanet.ReconfigureRS(1, 2, 4, 4),
|
|
|
|
func(log *zap.Logger, index int, config *satellite.Config) {
|
|
|
|
config.Overlay.RepairExcludedCountryCodes = []string{"US"}
|
|
|
|
},
|
|
|
|
),
|
|
|
|
},
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
require.NoError(t, planet.Uplinks[0].CreateBucket(ctx, planet.Satellites[0], "testbucket"))
|
|
|
|
|
|
|
|
_, err := planet.Satellites[0].API.Buckets.Service.UpdateBucket(ctx, buckets.Bucket{
|
|
|
|
ProjectID: planet.Uplinks[0].Projects[0].ID,
|
|
|
|
Name: "testbucket",
|
|
|
|
Placement: storj.EU,
|
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
require.NoError(t, planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, node.ID(), "PL"))
|
|
|
|
}
|
|
|
|
|
|
|
|
err = planet.Uplinks[0].Upload(ctx, planet.Satellites[0], "testbucket", "object", testrand.Bytes(5*memory.KiB))
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
segments, err := planet.Satellites[0].Metabase.DB.TestingAllSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, segments, 1)
|
|
|
|
require.Len(t, segments[0].Pieces, 4)
|
|
|
|
|
|
|
|
// change single node to location outside bucket placement and location which is part of RepairExcludedCountryCodes
|
|
|
|
require.NoError(t, planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, segments[0].Pieces[0].StorageNode, "US"))
|
|
|
|
|
|
|
|
require.NoError(t, planet.Satellites[0].Repairer.Overlay.DownloadSelectionCache.Refresh(ctx))
|
|
|
|
|
|
|
|
shouldDelete, err := planet.Satellites[0].Repairer.SegmentRepairer.Repair(ctx, &queue.InjuredSegment{
|
|
|
|
StreamID: segments[0].StreamID,
|
|
|
|
Position: segments[0].Position,
|
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.True(t, shouldDelete)
|
|
|
|
|
|
|
|
// we are checking that repairer counted only single piece as out of placement and didn't count this piece
|
|
|
|
// also as from excluded country. That would cause full repair because repairer would count single pieces
|
|
|
|
// as unhealthy two times. Full repair would restore number of pieces to 4 but we just removed single pieces.
|
|
|
|
segmentsAfter, err := planet.Satellites[0].Metabase.DB.TestingAllSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.ElementsMatch(t, segments[0].Pieces[1:], segmentsAfter[0].Pieces)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-05-26 13:22:15 +01:00
|
|
|
func allPiecesInPlacement(ctx context.Context, overaly *overlay.Service, pieces metabase.Pieces, placement storj.PlacementConstraint) (bool, error) {
|
|
|
|
for _, piece := range pieces {
|
|
|
|
nodeDossier, err := overaly.Get(ctx, piece.StorageNode)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
if !placement.AllowedCountry(nodeDossier.CountryCode) {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}
|
2023-06-26 10:54:52 +01:00
|
|
|
|
|
|
|
func updateNodeStatus(ctx context.Context, satellite *testplanet.Satellite, node *testplanet.StorageNode, offline bool, countryCode location.CountryCode) error {
|
|
|
|
timestamp := time.Now()
|
|
|
|
if offline {
|
|
|
|
timestamp = time.Now().Add(-4 * time.Hour)
|
|
|
|
}
|
|
|
|
|
|
|
|
return satellite.DB.OverlayCache().UpdateCheckIn(ctx, overlay.NodeCheckInInfo{
|
|
|
|
NodeID: node.ID(),
|
|
|
|
Address: &pb.NodeAddress{Address: node.Addr()},
|
|
|
|
IsUp: true,
|
|
|
|
Version: &pb.NodeVersion{
|
|
|
|
Version: "v0.0.0",
|
|
|
|
CommitHash: "",
|
|
|
|
Timestamp: time.Time{},
|
|
|
|
Release: true,
|
|
|
|
},
|
|
|
|
Capacity: &pb.NodeCapacity{
|
|
|
|
FreeDisk: 1 * memory.GiB.Int64(),
|
|
|
|
},
|
|
|
|
CountryCode: countryCode,
|
|
|
|
}, timestamp, satellite.Config.Overlay.Node)
|
|
|
|
}
|