satellite/repair: update repair test to use blake3 algo
Update repair test to also use blake3 hashing algorithm https://github.com/storj/storj/issues/5649 Change-Id: Id8299576f8be4cfd84ddf9a6b852e653628ada72
This commit is contained in:
parent
52ff7a66a0
commit
bc30deee11
@ -1064,7 +1064,7 @@ func TestMissingPieceDataRepair(t *testing.T) {
|
||||
}
|
||||
|
||||
// TestCorruptDataRepair_Succeed does the following:
|
||||
// - Uploads test data
|
||||
// - Uploads test data using different hash algorithms (Blake3 and SHA256)
|
||||
// - Kills some nodes carrying the uploaded segment but keep it above minimum requirement
|
||||
// - On one of the remaining nodes, corrupt the piece data being stored by that node
|
||||
// - Triggers data repair, which attempts to repair the data from the remaining nodes to
|
||||
@ -1074,109 +1074,126 @@ func TestMissingPieceDataRepair(t *testing.T) {
|
||||
func TestCorruptDataRepair_Succeed(t *testing.T) {
|
||||
const RepairMaxExcessRateOptimalThreshold = 0.05
|
||||
|
||||
testplanet.Run(t, testplanet.Config{
|
||||
SatelliteCount: 1,
|
||||
StorageNodeCount: 15,
|
||||
UplinkCount: 1,
|
||||
Reconfigure: testplanet.Reconfigure{
|
||||
Satellite: testplanet.Combine(
|
||||
func(log *zap.Logger, index int, config *satellite.Config) {
|
||||
config.Repairer.MaxExcessRateOptimalThreshold = RepairMaxExcessRateOptimalThreshold
|
||||
config.Repairer.InMemoryRepair = true
|
||||
config.Repairer.ReputationUpdateEnabled = true
|
||||
config.Reputation.InitialAlpha = 1
|
||||
config.Reputation.AuditLambda = 0.95
|
||||
},
|
||||
testplanet.ReconfigureRS(3, 4, 9, 9),
|
||||
),
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
hashAlgo pb.PieceHashAlgorithm
|
||||
}{
|
||||
{
|
||||
name: "BLAKE3",
|
||||
hashAlgo: pb.PieceHashAlgorithm_BLAKE3,
|
||||
},
|
||||
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
||||
uplinkPeer := planet.Uplinks[0]
|
||||
satellite := planet.Satellites[0]
|
||||
// stop audit to prevent possible interactions i.e. repair timeout problems
|
||||
satellite.Audit.Worker.Loop.Pause()
|
||||
{
|
||||
name: "SHA256",
|
||||
hashAlgo: pb.PieceHashAlgorithm_SHA256,
|
||||
},
|
||||
} {
|
||||
tt := tt
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
testplanet.Run(t, testplanet.Config{
|
||||
SatelliteCount: 1,
|
||||
StorageNodeCount: 15,
|
||||
UplinkCount: 1,
|
||||
Reconfigure: testplanet.Reconfigure{
|
||||
Satellite: testplanet.Combine(
|
||||
func(log *zap.Logger, index int, config *satellite.Config) {
|
||||
config.Repairer.MaxExcessRateOptimalThreshold = RepairMaxExcessRateOptimalThreshold
|
||||
config.Repairer.InMemoryRepair = true
|
||||
config.Repairer.ReputationUpdateEnabled = true
|
||||
config.Reputation.InitialAlpha = 1
|
||||
config.Reputation.AuditLambda = 0.95
|
||||
},
|
||||
testplanet.ReconfigureRS(3, 4, 9, 9),
|
||||
),
|
||||
},
|
||||
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
||||
uplinkPeer := planet.Uplinks[0]
|
||||
satellite := planet.Satellites[0]
|
||||
// stop audit to prevent possible interactions i.e. repair timeout problems
|
||||
satellite.Audit.Worker.Loop.Pause()
|
||||
|
||||
satellite.RangedLoop.RangedLoop.Service.Loop.Stop()
|
||||
satellite.Repair.Repairer.Loop.Pause()
|
||||
satellite.RangedLoop.RangedLoop.Service.Loop.Stop()
|
||||
satellite.Repair.Repairer.Loop.Pause()
|
||||
|
||||
var testData = testrand.Bytes(8 * memory.KiB)
|
||||
// first, upload some remote data
|
||||
err := uplinkPeer.Upload(ctx, satellite, "testbucket", "test/path", testData)
|
||||
require.NoError(t, err)
|
||||
var testData = testrand.Bytes(8 * memory.KiB)
|
||||
// first, upload some remote data
|
||||
err := uplinkPeer.Upload(piecestore.WithPieceHashAlgo(ctx, tt.hashAlgo), satellite, "testbucket", "test/path", testData)
|
||||
require.NoError(t, err)
|
||||
|
||||
segment, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||
require.Equal(t, 9, len(segment.Pieces))
|
||||
require.Equal(t, 3, int(segment.Redundancy.RequiredShares))
|
||||
toKill := 5
|
||||
segment, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||
require.Equal(t, 9, len(segment.Pieces))
|
||||
require.Equal(t, 3, int(segment.Redundancy.RequiredShares))
|
||||
toKill := 5
|
||||
|
||||
// kill nodes and track lost pieces
|
||||
var availablePieces metabase.Pieces
|
||||
// kill nodes and track lost pieces
|
||||
var availablePieces metabase.Pieces
|
||||
|
||||
for i, piece := range segment.Pieces {
|
||||
if i >= toKill {
|
||||
availablePieces = append(availablePieces, piece)
|
||||
continue
|
||||
}
|
||||
for i, piece := range segment.Pieces {
|
||||
if i >= toKill {
|
||||
availablePieces = append(availablePieces, piece)
|
||||
continue
|
||||
}
|
||||
|
||||
err := planet.StopNodeAndUpdate(ctx, planet.FindNode(piece.StorageNode))
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.Equal(t, 4, len(availablePieces))
|
||||
err := planet.StopNodeAndUpdate(ctx, planet.FindNode(piece.StorageNode))
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.Equal(t, 4, len(availablePieces))
|
||||
|
||||
// choose first piece for corruption, for it to always be in the first limiter batch
|
||||
corruptedPiece := availablePieces[0]
|
||||
// choose first piece for corruption, for it to always be in the first limiter batch
|
||||
corruptedPiece := availablePieces[0]
|
||||
|
||||
// corrupt piece data
|
||||
corruptedNode := planet.FindNode(corruptedPiece.StorageNode)
|
||||
require.NotNil(t, corruptedNode)
|
||||
corruptedPieceID := segment.RootPieceID.Derive(corruptedPiece.StorageNode, int32(corruptedPiece.Number))
|
||||
corruptPieceData(ctx, t, planet, corruptedNode, corruptedPieceID)
|
||||
// corrupt piece data
|
||||
corruptedNode := planet.FindNode(corruptedPiece.StorageNode)
|
||||
require.NotNil(t, corruptedNode)
|
||||
corruptedPieceID := segment.RootPieceID.Derive(corruptedPiece.StorageNode, int32(corruptedPiece.Number))
|
||||
corruptPieceData(ctx, t, planet, corruptedNode, corruptedPieceID)
|
||||
|
||||
reputationService := satellite.Repairer.Reputation
|
||||
reputationService := satellite.Repairer.Reputation
|
||||
|
||||
nodesReputation := make(map[storj.NodeID]reputation.Info)
|
||||
for _, piece := range availablePieces {
|
||||
info, err := reputationService.Get(ctx, piece.StorageNode)
|
||||
require.NoError(t, err)
|
||||
nodesReputation[piece.StorageNode] = *info
|
||||
}
|
||||
nodesReputation := make(map[storj.NodeID]reputation.Info)
|
||||
for _, piece := range availablePieces {
|
||||
info, err := reputationService.Get(ctx, piece.StorageNode)
|
||||
require.NoError(t, err)
|
||||
nodesReputation[piece.StorageNode] = *info
|
||||
}
|
||||
|
||||
satellite.Repair.Repairer.TestingSetMinFailures(1) // expect one node with bad data
|
||||
// trigger checker with ranged loop to add segment to repair queue
|
||||
_, err = satellite.RangedLoop.RangedLoop.Service.RunOnce(ctx)
|
||||
require.NoError(t, err)
|
||||
satellite.Repair.Repairer.Loop.Restart()
|
||||
satellite.Repair.Repairer.Loop.TriggerWait()
|
||||
satellite.Repair.Repairer.Loop.Pause()
|
||||
satellite.Repair.Repairer.WaitForPendingRepairs()
|
||||
satellite.Repair.Repairer.TestingSetMinFailures(1) // expect one node with bad data
|
||||
// trigger checker with ranged loop to add segment to repair queue
|
||||
_, err = satellite.RangedLoop.RangedLoop.Service.RunOnce(ctx)
|
||||
require.NoError(t, err)
|
||||
satellite.Repair.Repairer.Loop.Restart()
|
||||
satellite.Repair.Repairer.Loop.TriggerWait()
|
||||
satellite.Repair.Repairer.Loop.Pause()
|
||||
satellite.Repair.Repairer.WaitForPendingRepairs()
|
||||
|
||||
nodesReputationAfter := make(map[storj.NodeID]reputation.Info)
|
||||
for _, piece := range availablePieces {
|
||||
info, err := reputationService.Get(ctx, piece.StorageNode)
|
||||
require.NoError(t, err)
|
||||
nodesReputationAfter[piece.StorageNode] = *info
|
||||
}
|
||||
nodesReputationAfter := make(map[storj.NodeID]reputation.Info)
|
||||
for _, piece := range availablePieces {
|
||||
info, err := reputationService.Get(ctx, piece.StorageNode)
|
||||
require.NoError(t, err)
|
||||
nodesReputationAfter[piece.StorageNode] = *info
|
||||
}
|
||||
|
||||
// repair should update audit status
|
||||
for _, piece := range availablePieces[1:] {
|
||||
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
||||
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
||||
require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount)
|
||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount)
|
||||
require.GreaterOrEqual(t, reputationRatio(successfulNodeReputationAfter), reputationRatio(successfulNodeReputation))
|
||||
}
|
||||
// repair should update audit status
|
||||
for _, piece := range availablePieces[1:] {
|
||||
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
||||
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
||||
require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount)
|
||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount)
|
||||
require.GreaterOrEqual(t, reputationRatio(successfulNodeReputationAfter), reputationRatio(successfulNodeReputation))
|
||||
}
|
||||
|
||||
corruptedNodeReputation := nodesReputation[corruptedPiece.StorageNode]
|
||||
corruptedNodeReputationAfter := nodesReputationAfter[corruptedPiece.StorageNode]
|
||||
require.Equal(t, corruptedNodeReputation.TotalAuditCount+1, corruptedNodeReputationAfter.TotalAuditCount)
|
||||
require.Less(t, reputationRatio(corruptedNodeReputationAfter), reputationRatio(corruptedNodeReputation))
|
||||
corruptedNodeReputation := nodesReputation[corruptedPiece.StorageNode]
|
||||
corruptedNodeReputationAfter := nodesReputationAfter[corruptedPiece.StorageNode]
|
||||
require.Equal(t, corruptedNodeReputation.TotalAuditCount+1, corruptedNodeReputationAfter.TotalAuditCount)
|
||||
require.Less(t, reputationRatio(corruptedNodeReputationAfter), reputationRatio(corruptedNodeReputation))
|
||||
|
||||
// repair succeeded, so segment should not contain corrupted piece
|
||||
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||
for _, piece := range segmentAfter.Pieces {
|
||||
require.NotEqual(t, piece.Number, corruptedPiece.Number, "there should be no corrupted piece in pointer")
|
||||
}
|
||||
})
|
||||
// repair succeeded, so segment should not contain corrupted piece
|
||||
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||
for _, piece := range segmentAfter.Pieces {
|
||||
require.NotEqual(t, piece.Number, corruptedPiece.Number, "there should be no corrupted piece in pointer")
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestCorruptDataRepair_Failed does the following:
|
||||
|
Loading…
Reference in New Issue
Block a user