satellite/repair: update repair test to use blake3 algo
Update repair test to also use blake3 hashing algorithm https://github.com/storj/storj/issues/5649 Change-Id: Id8299576f8be4cfd84ddf9a6b852e653628ada72
This commit is contained in:
parent
52ff7a66a0
commit
bc30deee11
@ -1064,7 +1064,7 @@ func TestMissingPieceDataRepair(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TestCorruptDataRepair_Succeed does the following:
|
// TestCorruptDataRepair_Succeed does the following:
|
||||||
// - Uploads test data
|
// - Uploads test data using different hash algorithms (Blake3 and SHA256)
|
||||||
// - Kills some nodes carrying the uploaded segment but keep it above minimum requirement
|
// - Kills some nodes carrying the uploaded segment but keep it above minimum requirement
|
||||||
// - On one of the remaining nodes, corrupt the piece data being stored by that node
|
// - On one of the remaining nodes, corrupt the piece data being stored by that node
|
||||||
// - Triggers data repair, which attempts to repair the data from the remaining nodes to
|
// - Triggers data repair, which attempts to repair the data from the remaining nodes to
|
||||||
@ -1074,109 +1074,126 @@ func TestMissingPieceDataRepair(t *testing.T) {
|
|||||||
func TestCorruptDataRepair_Succeed(t *testing.T) {
|
func TestCorruptDataRepair_Succeed(t *testing.T) {
|
||||||
const RepairMaxExcessRateOptimalThreshold = 0.05
|
const RepairMaxExcessRateOptimalThreshold = 0.05
|
||||||
|
|
||||||
testplanet.Run(t, testplanet.Config{
|
for _, tt := range []struct {
|
||||||
SatelliteCount: 1,
|
name string
|
||||||
StorageNodeCount: 15,
|
hashAlgo pb.PieceHashAlgorithm
|
||||||
UplinkCount: 1,
|
}{
|
||||||
Reconfigure: testplanet.Reconfigure{
|
{
|
||||||
Satellite: testplanet.Combine(
|
name: "BLAKE3",
|
||||||
func(log *zap.Logger, index int, config *satellite.Config) {
|
hashAlgo: pb.PieceHashAlgorithm_BLAKE3,
|
||||||
config.Repairer.MaxExcessRateOptimalThreshold = RepairMaxExcessRateOptimalThreshold
|
|
||||||
config.Repairer.InMemoryRepair = true
|
|
||||||
config.Repairer.ReputationUpdateEnabled = true
|
|
||||||
config.Reputation.InitialAlpha = 1
|
|
||||||
config.Reputation.AuditLambda = 0.95
|
|
||||||
},
|
|
||||||
testplanet.ReconfigureRS(3, 4, 9, 9),
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
{
|
||||||
uplinkPeer := planet.Uplinks[0]
|
name: "SHA256",
|
||||||
satellite := planet.Satellites[0]
|
hashAlgo: pb.PieceHashAlgorithm_SHA256,
|
||||||
// stop audit to prevent possible interactions i.e. repair timeout problems
|
},
|
||||||
satellite.Audit.Worker.Loop.Pause()
|
} {
|
||||||
|
tt := tt
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
testplanet.Run(t, testplanet.Config{
|
||||||
|
SatelliteCount: 1,
|
||||||
|
StorageNodeCount: 15,
|
||||||
|
UplinkCount: 1,
|
||||||
|
Reconfigure: testplanet.Reconfigure{
|
||||||
|
Satellite: testplanet.Combine(
|
||||||
|
func(log *zap.Logger, index int, config *satellite.Config) {
|
||||||
|
config.Repairer.MaxExcessRateOptimalThreshold = RepairMaxExcessRateOptimalThreshold
|
||||||
|
config.Repairer.InMemoryRepair = true
|
||||||
|
config.Repairer.ReputationUpdateEnabled = true
|
||||||
|
config.Reputation.InitialAlpha = 1
|
||||||
|
config.Reputation.AuditLambda = 0.95
|
||||||
|
},
|
||||||
|
testplanet.ReconfigureRS(3, 4, 9, 9),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
||||||
|
uplinkPeer := planet.Uplinks[0]
|
||||||
|
satellite := planet.Satellites[0]
|
||||||
|
// stop audit to prevent possible interactions i.e. repair timeout problems
|
||||||
|
satellite.Audit.Worker.Loop.Pause()
|
||||||
|
|
||||||
satellite.RangedLoop.RangedLoop.Service.Loop.Stop()
|
satellite.RangedLoop.RangedLoop.Service.Loop.Stop()
|
||||||
satellite.Repair.Repairer.Loop.Pause()
|
satellite.Repair.Repairer.Loop.Pause()
|
||||||
|
|
||||||
var testData = testrand.Bytes(8 * memory.KiB)
|
var testData = testrand.Bytes(8 * memory.KiB)
|
||||||
// first, upload some remote data
|
// first, upload some remote data
|
||||||
err := uplinkPeer.Upload(ctx, satellite, "testbucket", "test/path", testData)
|
err := uplinkPeer.Upload(piecestore.WithPieceHashAlgo(ctx, tt.hashAlgo), satellite, "testbucket", "test/path", testData)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
segment, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
segment, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||||
require.Equal(t, 9, len(segment.Pieces))
|
require.Equal(t, 9, len(segment.Pieces))
|
||||||
require.Equal(t, 3, int(segment.Redundancy.RequiredShares))
|
require.Equal(t, 3, int(segment.Redundancy.RequiredShares))
|
||||||
toKill := 5
|
toKill := 5
|
||||||
|
|
||||||
// kill nodes and track lost pieces
|
// kill nodes and track lost pieces
|
||||||
var availablePieces metabase.Pieces
|
var availablePieces metabase.Pieces
|
||||||
|
|
||||||
for i, piece := range segment.Pieces {
|
for i, piece := range segment.Pieces {
|
||||||
if i >= toKill {
|
if i >= toKill {
|
||||||
availablePieces = append(availablePieces, piece)
|
availablePieces = append(availablePieces, piece)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
err := planet.StopNodeAndUpdate(ctx, planet.FindNode(piece.StorageNode))
|
err := planet.StopNodeAndUpdate(ctx, planet.FindNode(piece.StorageNode))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
}
|
}
|
||||||
require.Equal(t, 4, len(availablePieces))
|
require.Equal(t, 4, len(availablePieces))
|
||||||
|
|
||||||
// choose first piece for corruption, for it to always be in the first limiter batch
|
// choose first piece for corruption, for it to always be in the first limiter batch
|
||||||
corruptedPiece := availablePieces[0]
|
corruptedPiece := availablePieces[0]
|
||||||
|
|
||||||
// corrupt piece data
|
// corrupt piece data
|
||||||
corruptedNode := planet.FindNode(corruptedPiece.StorageNode)
|
corruptedNode := planet.FindNode(corruptedPiece.StorageNode)
|
||||||
require.NotNil(t, corruptedNode)
|
require.NotNil(t, corruptedNode)
|
||||||
corruptedPieceID := segment.RootPieceID.Derive(corruptedPiece.StorageNode, int32(corruptedPiece.Number))
|
corruptedPieceID := segment.RootPieceID.Derive(corruptedPiece.StorageNode, int32(corruptedPiece.Number))
|
||||||
corruptPieceData(ctx, t, planet, corruptedNode, corruptedPieceID)
|
corruptPieceData(ctx, t, planet, corruptedNode, corruptedPieceID)
|
||||||
|
|
||||||
reputationService := satellite.Repairer.Reputation
|
reputationService := satellite.Repairer.Reputation
|
||||||
|
|
||||||
nodesReputation := make(map[storj.NodeID]reputation.Info)
|
nodesReputation := make(map[storj.NodeID]reputation.Info)
|
||||||
for _, piece := range availablePieces {
|
for _, piece := range availablePieces {
|
||||||
info, err := reputationService.Get(ctx, piece.StorageNode)
|
info, err := reputationService.Get(ctx, piece.StorageNode)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
nodesReputation[piece.StorageNode] = *info
|
nodesReputation[piece.StorageNode] = *info
|
||||||
}
|
}
|
||||||
|
|
||||||
satellite.Repair.Repairer.TestingSetMinFailures(1) // expect one node with bad data
|
satellite.Repair.Repairer.TestingSetMinFailures(1) // expect one node with bad data
|
||||||
// trigger checker with ranged loop to add segment to repair queue
|
// trigger checker with ranged loop to add segment to repair queue
|
||||||
_, err = satellite.RangedLoop.RangedLoop.Service.RunOnce(ctx)
|
_, err = satellite.RangedLoop.RangedLoop.Service.RunOnce(ctx)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
satellite.Repair.Repairer.Loop.Restart()
|
satellite.Repair.Repairer.Loop.Restart()
|
||||||
satellite.Repair.Repairer.Loop.TriggerWait()
|
satellite.Repair.Repairer.Loop.TriggerWait()
|
||||||
satellite.Repair.Repairer.Loop.Pause()
|
satellite.Repair.Repairer.Loop.Pause()
|
||||||
satellite.Repair.Repairer.WaitForPendingRepairs()
|
satellite.Repair.Repairer.WaitForPendingRepairs()
|
||||||
|
|
||||||
nodesReputationAfter := make(map[storj.NodeID]reputation.Info)
|
nodesReputationAfter := make(map[storj.NodeID]reputation.Info)
|
||||||
for _, piece := range availablePieces {
|
for _, piece := range availablePieces {
|
||||||
info, err := reputationService.Get(ctx, piece.StorageNode)
|
info, err := reputationService.Get(ctx, piece.StorageNode)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
nodesReputationAfter[piece.StorageNode] = *info
|
nodesReputationAfter[piece.StorageNode] = *info
|
||||||
}
|
}
|
||||||
|
|
||||||
// repair should update audit status
|
// repair should update audit status
|
||||||
for _, piece := range availablePieces[1:] {
|
for _, piece := range availablePieces[1:] {
|
||||||
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
||||||
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
||||||
require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount)
|
require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount)
|
||||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount)
|
require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount)
|
||||||
require.GreaterOrEqual(t, reputationRatio(successfulNodeReputationAfter), reputationRatio(successfulNodeReputation))
|
require.GreaterOrEqual(t, reputationRatio(successfulNodeReputationAfter), reputationRatio(successfulNodeReputation))
|
||||||
}
|
}
|
||||||
|
|
||||||
corruptedNodeReputation := nodesReputation[corruptedPiece.StorageNode]
|
corruptedNodeReputation := nodesReputation[corruptedPiece.StorageNode]
|
||||||
corruptedNodeReputationAfter := nodesReputationAfter[corruptedPiece.StorageNode]
|
corruptedNodeReputationAfter := nodesReputationAfter[corruptedPiece.StorageNode]
|
||||||
require.Equal(t, corruptedNodeReputation.TotalAuditCount+1, corruptedNodeReputationAfter.TotalAuditCount)
|
require.Equal(t, corruptedNodeReputation.TotalAuditCount+1, corruptedNodeReputationAfter.TotalAuditCount)
|
||||||
require.Less(t, reputationRatio(corruptedNodeReputationAfter), reputationRatio(corruptedNodeReputation))
|
require.Less(t, reputationRatio(corruptedNodeReputationAfter), reputationRatio(corruptedNodeReputation))
|
||||||
|
|
||||||
// repair succeeded, so segment should not contain corrupted piece
|
// repair succeeded, so segment should not contain corrupted piece
|
||||||
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||||
for _, piece := range segmentAfter.Pieces {
|
for _, piece := range segmentAfter.Pieces {
|
||||||
require.NotEqual(t, piece.Number, corruptedPiece.Number, "there should be no corrupted piece in pointer")
|
require.NotEqual(t, piece.Number, corruptedPiece.Number, "there should be no corrupted piece in pointer")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestCorruptDataRepair_Failed does the following:
|
// TestCorruptDataRepair_Failed does the following:
|
||||||
|
Loading…
Reference in New Issue
Block a user