diff --git a/satellite/repair/repair_test.go b/satellite/repair/repair_test.go index e184ebcab..5f3aeacc5 100644 --- a/satellite/repair/repair_test.go +++ b/satellite/repair/repair_test.go @@ -1064,7 +1064,7 @@ func TestMissingPieceDataRepair(t *testing.T) { } // TestCorruptDataRepair_Succeed does the following: -// - Uploads test data +// - Uploads test data using different hash algorithms (Blake3 and SHA256) // - Kills some nodes carrying the uploaded segment but keep it above minimum requirement // - On one of the remaining nodes, corrupt the piece data being stored by that node // - Triggers data repair, which attempts to repair the data from the remaining nodes to @@ -1074,109 +1074,126 @@ func TestMissingPieceDataRepair(t *testing.T) { func TestCorruptDataRepair_Succeed(t *testing.T) { const RepairMaxExcessRateOptimalThreshold = 0.05 - testplanet.Run(t, testplanet.Config{ - SatelliteCount: 1, - StorageNodeCount: 15, - UplinkCount: 1, - Reconfigure: testplanet.Reconfigure{ - Satellite: testplanet.Combine( - func(log *zap.Logger, index int, config *satellite.Config) { - config.Repairer.MaxExcessRateOptimalThreshold = RepairMaxExcessRateOptimalThreshold - config.Repairer.InMemoryRepair = true - config.Repairer.ReputationUpdateEnabled = true - config.Reputation.InitialAlpha = 1 - config.Reputation.AuditLambda = 0.95 - }, - testplanet.ReconfigureRS(3, 4, 9, 9), - ), + for _, tt := range []struct { + name string + hashAlgo pb.PieceHashAlgorithm + }{ + { + name: "BLAKE3", + hashAlgo: pb.PieceHashAlgorithm_BLAKE3, }, - }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { - uplinkPeer := planet.Uplinks[0] - satellite := planet.Satellites[0] - // stop audit to prevent possible interactions i.e. repair timeout problems - satellite.Audit.Worker.Loop.Pause() + { + name: "SHA256", + hashAlgo: pb.PieceHashAlgorithm_SHA256, + }, + } { + tt := tt + t.Run(tt.name, func(t *testing.T) { + testplanet.Run(t, testplanet.Config{ + SatelliteCount: 1, + StorageNodeCount: 15, + UplinkCount: 1, + Reconfigure: testplanet.Reconfigure{ + Satellite: testplanet.Combine( + func(log *zap.Logger, index int, config *satellite.Config) { + config.Repairer.MaxExcessRateOptimalThreshold = RepairMaxExcessRateOptimalThreshold + config.Repairer.InMemoryRepair = true + config.Repairer.ReputationUpdateEnabled = true + config.Reputation.InitialAlpha = 1 + config.Reputation.AuditLambda = 0.95 + }, + testplanet.ReconfigureRS(3, 4, 9, 9), + ), + }, + }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { + uplinkPeer := planet.Uplinks[0] + satellite := planet.Satellites[0] + // stop audit to prevent possible interactions i.e. repair timeout problems + satellite.Audit.Worker.Loop.Pause() - satellite.RangedLoop.RangedLoop.Service.Loop.Stop() - satellite.Repair.Repairer.Loop.Pause() + satellite.RangedLoop.RangedLoop.Service.Loop.Stop() + satellite.Repair.Repairer.Loop.Pause() - var testData = testrand.Bytes(8 * memory.KiB) - // first, upload some remote data - err := uplinkPeer.Upload(ctx, satellite, "testbucket", "test/path", testData) - require.NoError(t, err) + var testData = testrand.Bytes(8 * memory.KiB) + // first, upload some remote data + err := uplinkPeer.Upload(piecestore.WithPieceHashAlgo(ctx, tt.hashAlgo), satellite, "testbucket", "test/path", testData) + require.NoError(t, err) - segment, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket") - require.Equal(t, 9, len(segment.Pieces)) - require.Equal(t, 3, int(segment.Redundancy.RequiredShares)) - toKill := 5 + segment, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket") + require.Equal(t, 9, len(segment.Pieces)) + require.Equal(t, 3, int(segment.Redundancy.RequiredShares)) + toKill := 5 - // kill nodes and track lost pieces - var availablePieces metabase.Pieces + // kill nodes and track lost pieces + var availablePieces metabase.Pieces - for i, piece := range segment.Pieces { - if i >= toKill { - availablePieces = append(availablePieces, piece) - continue - } + for i, piece := range segment.Pieces { + if i >= toKill { + availablePieces = append(availablePieces, piece) + continue + } - err := planet.StopNodeAndUpdate(ctx, planet.FindNode(piece.StorageNode)) - require.NoError(t, err) - } - require.Equal(t, 4, len(availablePieces)) + err := planet.StopNodeAndUpdate(ctx, planet.FindNode(piece.StorageNode)) + require.NoError(t, err) + } + require.Equal(t, 4, len(availablePieces)) - // choose first piece for corruption, for it to always be in the first limiter batch - corruptedPiece := availablePieces[0] + // choose first piece for corruption, for it to always be in the first limiter batch + corruptedPiece := availablePieces[0] - // corrupt piece data - corruptedNode := planet.FindNode(corruptedPiece.StorageNode) - require.NotNil(t, corruptedNode) - corruptedPieceID := segment.RootPieceID.Derive(corruptedPiece.StorageNode, int32(corruptedPiece.Number)) - corruptPieceData(ctx, t, planet, corruptedNode, corruptedPieceID) + // corrupt piece data + corruptedNode := planet.FindNode(corruptedPiece.StorageNode) + require.NotNil(t, corruptedNode) + corruptedPieceID := segment.RootPieceID.Derive(corruptedPiece.StorageNode, int32(corruptedPiece.Number)) + corruptPieceData(ctx, t, planet, corruptedNode, corruptedPieceID) - reputationService := satellite.Repairer.Reputation + reputationService := satellite.Repairer.Reputation - nodesReputation := make(map[storj.NodeID]reputation.Info) - for _, piece := range availablePieces { - info, err := reputationService.Get(ctx, piece.StorageNode) - require.NoError(t, err) - nodesReputation[piece.StorageNode] = *info - } + nodesReputation := make(map[storj.NodeID]reputation.Info) + for _, piece := range availablePieces { + info, err := reputationService.Get(ctx, piece.StorageNode) + require.NoError(t, err) + nodesReputation[piece.StorageNode] = *info + } - satellite.Repair.Repairer.TestingSetMinFailures(1) // expect one node with bad data - // trigger checker with ranged loop to add segment to repair queue - _, err = satellite.RangedLoop.RangedLoop.Service.RunOnce(ctx) - require.NoError(t, err) - satellite.Repair.Repairer.Loop.Restart() - satellite.Repair.Repairer.Loop.TriggerWait() - satellite.Repair.Repairer.Loop.Pause() - satellite.Repair.Repairer.WaitForPendingRepairs() + satellite.Repair.Repairer.TestingSetMinFailures(1) // expect one node with bad data + // trigger checker with ranged loop to add segment to repair queue + _, err = satellite.RangedLoop.RangedLoop.Service.RunOnce(ctx) + require.NoError(t, err) + satellite.Repair.Repairer.Loop.Restart() + satellite.Repair.Repairer.Loop.TriggerWait() + satellite.Repair.Repairer.Loop.Pause() + satellite.Repair.Repairer.WaitForPendingRepairs() - nodesReputationAfter := make(map[storj.NodeID]reputation.Info) - for _, piece := range availablePieces { - info, err := reputationService.Get(ctx, piece.StorageNode) - require.NoError(t, err) - nodesReputationAfter[piece.StorageNode] = *info - } + nodesReputationAfter := make(map[storj.NodeID]reputation.Info) + for _, piece := range availablePieces { + info, err := reputationService.Get(ctx, piece.StorageNode) + require.NoError(t, err) + nodesReputationAfter[piece.StorageNode] = *info + } - // repair should update audit status - for _, piece := range availablePieces[1:] { - successfulNodeReputation := nodesReputation[piece.StorageNode] - successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode] - require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount) - require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount) - require.GreaterOrEqual(t, reputationRatio(successfulNodeReputationAfter), reputationRatio(successfulNodeReputation)) - } + // repair should update audit status + for _, piece := range availablePieces[1:] { + successfulNodeReputation := nodesReputation[piece.StorageNode] + successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode] + require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount) + require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount) + require.GreaterOrEqual(t, reputationRatio(successfulNodeReputationAfter), reputationRatio(successfulNodeReputation)) + } - corruptedNodeReputation := nodesReputation[corruptedPiece.StorageNode] - corruptedNodeReputationAfter := nodesReputationAfter[corruptedPiece.StorageNode] - require.Equal(t, corruptedNodeReputation.TotalAuditCount+1, corruptedNodeReputationAfter.TotalAuditCount) - require.Less(t, reputationRatio(corruptedNodeReputationAfter), reputationRatio(corruptedNodeReputation)) + corruptedNodeReputation := nodesReputation[corruptedPiece.StorageNode] + corruptedNodeReputationAfter := nodesReputationAfter[corruptedPiece.StorageNode] + require.Equal(t, corruptedNodeReputation.TotalAuditCount+1, corruptedNodeReputationAfter.TotalAuditCount) + require.Less(t, reputationRatio(corruptedNodeReputationAfter), reputationRatio(corruptedNodeReputation)) - // repair succeeded, so segment should not contain corrupted piece - segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket") - for _, piece := range segmentAfter.Pieces { - require.NotEqual(t, piece.Number, corruptedPiece.Number, "there should be no corrupted piece in pointer") - } - }) + // repair succeeded, so segment should not contain corrupted piece + segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket") + for _, piece := range segmentAfter.Pieces { + require.NotEqual(t, piece.Number, corruptedPiece.Number, "there should be no corrupted piece in pointer") + } + }) + }) + } } // TestCorruptDataRepair_Failed does the following: