satellite/repair: only record audit result if segment can be downloaded
If satellite can't find enough nodes to successfully download a segment, it probably is not the fault of storage nodes. Change-Id: I681f66056df0bb940da9edb3a7dbb3658c0a56cb
This commit is contained in:
parent
3de7f8d5af
commit
336500c04d
@ -569,27 +569,16 @@ func testFailedDataRepair(t *testing.T, inMemoryRepair bool) {
|
||||
nodesReputationAfter[piece.StorageNode] = *info
|
||||
}
|
||||
|
||||
// repair should update audit status
|
||||
for _, piece := range availablePieces[1 : len(availablePieces)-1] {
|
||||
// repair shouldn't update audit status
|
||||
for _, piece := range availablePieces {
|
||||
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
||||
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
||||
require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount)
|
||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount)
|
||||
require.True(t, successfulNodeReputation.AuditReputationAlpha < successfulNodeReputationAfter.AuditReputationAlpha)
|
||||
require.True(t, successfulNodeReputation.AuditReputationBeta >= successfulNodeReputationAfter.AuditReputationBeta)
|
||||
require.Equal(t, successfulNodeReputation.TotalAuditCount, successfulNodeReputationAfter.TotalAuditCount)
|
||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount, successfulNodeReputationAfter.AuditSuccessCount)
|
||||
require.Equal(t, successfulNodeReputation.AuditReputationAlpha, successfulNodeReputationAfter.AuditReputationAlpha)
|
||||
require.Equal(t, successfulNodeReputation.AuditReputationBeta, successfulNodeReputationAfter.AuditReputationBeta)
|
||||
}
|
||||
|
||||
offlineNodeReputation := nodesReputation[offlinePiece.StorageNode]
|
||||
offlineNodeReputationAfter := nodesReputationAfter[offlinePiece.StorageNode]
|
||||
require.Equal(t, offlineNodeReputation.TotalAuditCount+1, offlineNodeReputationAfter.TotalAuditCount)
|
||||
require.Equal(t, int32(0), offlineNodeReputationAfter.AuditHistory.Windows[0].OnlineCount)
|
||||
|
||||
badNodeReputation := nodesReputation[unknownPiece.StorageNode]
|
||||
badNodeReputationAfter := nodesReputationAfter[unknownPiece.StorageNode]
|
||||
require.Equal(t, badNodeReputation.TotalAuditCount+1, badNodeReputationAfter.TotalAuditCount)
|
||||
require.True(t, badNodeReputation.UnknownAuditReputationBeta < badNodeReputationAfter.UnknownAuditReputationBeta)
|
||||
require.True(t, badNodeReputation.UnknownAuditReputationAlpha >= badNodeReputationAfter.UnknownAuditReputationAlpha)
|
||||
|
||||
// repair should fail, so segment should contain all the original nodes
|
||||
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||
for _, piece := range segmentAfter.Pieces {
|
||||
@ -1077,22 +1066,16 @@ func testMissingPieceDataRepairFailed(t *testing.T, inMemoryRepair bool) {
|
||||
nodesReputationAfter[piece.StorageNode] = *info
|
||||
}
|
||||
|
||||
// repair should update audit status
|
||||
// repair shouldn't update audit status
|
||||
for _, piece := range successful {
|
||||
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
||||
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
||||
require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount)
|
||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount)
|
||||
require.True(t, successfulNodeReputation.AuditReputationAlpha < successfulNodeReputationAfter.AuditReputationAlpha)
|
||||
require.True(t, successfulNodeReputation.AuditReputationBeta >= successfulNodeReputationAfter.AuditReputationBeta)
|
||||
require.Equal(t, successfulNodeReputation.TotalAuditCount, successfulNodeReputationAfter.TotalAuditCount)
|
||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount, successfulNodeReputationAfter.AuditSuccessCount)
|
||||
require.Equal(t, successfulNodeReputation.AuditReputationAlpha, successfulNodeReputationAfter.AuditReputationAlpha)
|
||||
require.Equal(t, successfulNodeReputation.AuditReputationBeta, successfulNodeReputationAfter.AuditReputationBeta)
|
||||
}
|
||||
|
||||
missingPieceNodeReputation := nodesReputation[missingPiece.StorageNode]
|
||||
missingPieceNodeReputationAfter := nodesReputationAfter[missingPiece.StorageNode]
|
||||
require.Equal(t, missingPieceNodeReputation.TotalAuditCount+1, missingPieceNodeReputationAfter.TotalAuditCount)
|
||||
require.True(t, missingPieceNodeReputation.AuditReputationBeta < missingPieceNodeReputationAfter.AuditReputationBeta)
|
||||
require.True(t, missingPieceNodeReputation.AuditReputationAlpha >= missingPieceNodeReputationAfter.AuditReputationAlpha)
|
||||
|
||||
// repair should fail, so segment should contain all the original nodes
|
||||
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||
for _, piece := range segmentAfter.Pieces {
|
||||
@ -1325,22 +1308,16 @@ func testCorruptDataRepairFailed(t *testing.T, inMemoryRepair bool) {
|
||||
nodesReputationAfter[piece.StorageNode] = *info
|
||||
}
|
||||
|
||||
// repair should update audit status
|
||||
// repair shouldn't update audit status
|
||||
for _, piece := range successful {
|
||||
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
||||
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
||||
require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount)
|
||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount)
|
||||
require.True(t, successfulNodeReputation.AuditReputationAlpha < successfulNodeReputationAfter.AuditReputationAlpha)
|
||||
require.True(t, successfulNodeReputation.AuditReputationBeta >= successfulNodeReputationAfter.AuditReputationBeta)
|
||||
require.Equal(t, successfulNodeReputation.TotalAuditCount, successfulNodeReputationAfter.TotalAuditCount)
|
||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount, successfulNodeReputationAfter.AuditSuccessCount)
|
||||
require.Equal(t, successfulNodeReputation.AuditReputationAlpha, successfulNodeReputationAfter.AuditReputationAlpha)
|
||||
require.Equal(t, successfulNodeReputation.AuditReputationBeta, successfulNodeReputationAfter.AuditReputationBeta)
|
||||
}
|
||||
|
||||
corruptedNodeReputation := nodesReputation[corruptedPiece.StorageNode]
|
||||
corruptedNodeReputationAfter := nodesReputationAfter[corruptedPiece.StorageNode]
|
||||
require.Equal(t, corruptedNodeReputation.TotalAuditCount+1, corruptedNodeReputationAfter.TotalAuditCount)
|
||||
require.True(t, corruptedNodeReputation.AuditReputationBeta < corruptedNodeReputationAfter.AuditReputationBeta)
|
||||
require.True(t, corruptedNodeReputation.AuditReputationAlpha >= corruptedNodeReputationAfter.AuditReputationAlpha)
|
||||
|
||||
// repair should fail, so segment should contain all the original nodes
|
||||
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||
for _, piece := range segmentAfter.Pieces {
|
||||
|
@ -315,24 +315,6 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
|
||||
if len(piecesReport.Contained) > 0 {
|
||||
repairer.log.Debug("unexpected contained pieces during repair", zap.Int("count", len(piecesReport.Contained)))
|
||||
}
|
||||
var report audit.Report
|
||||
for _, piece := range piecesReport.Successful {
|
||||
report.Successes = append(report.Successes, piece.StorageNode)
|
||||
}
|
||||
for _, piece := range piecesReport.Failed {
|
||||
report.Fails = append(report.Fails, piece.StorageNode)
|
||||
}
|
||||
for _, piece := range piecesReport.Offline {
|
||||
report.Offlines = append(report.Offlines, piece.StorageNode)
|
||||
}
|
||||
for _, piece := range piecesReport.Unknown {
|
||||
report.Unknown = append(report.Unknown, piece.StorageNode)
|
||||
}
|
||||
_, reportErr := repairer.reporter.RecordAudits(ctx, report)
|
||||
if reportErr != nil {
|
||||
// failed updates should not affect repair, therefore we will not return the error
|
||||
repairer.log.Debug("failed to record audit", zap.Error(reportErr))
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
// If the context was closed during the Get phase, it will appear here as though
|
||||
@ -363,6 +345,26 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
|
||||
}
|
||||
defer func() { err = errs.Combine(err, segmentReader.Close()) }()
|
||||
|
||||
// only report audit result when segment can be successfully downloaded
|
||||
var report audit.Report
|
||||
for _, piece := range piecesReport.Successful {
|
||||
report.Successes = append(report.Successes, piece.StorageNode)
|
||||
}
|
||||
for _, piece := range piecesReport.Failed {
|
||||
report.Fails = append(report.Fails, piece.StorageNode)
|
||||
}
|
||||
for _, piece := range piecesReport.Offline {
|
||||
report.Offlines = append(report.Offlines, piece.StorageNode)
|
||||
}
|
||||
for _, piece := range piecesReport.Unknown {
|
||||
report.Unknown = append(report.Unknown, piece.StorageNode)
|
||||
}
|
||||
_, reportErr := repairer.reporter.RecordAudits(ctx, report)
|
||||
if reportErr != nil {
|
||||
// failed updates should not affect repair, therefore we will not return the error
|
||||
repairer.log.Debug("failed to record audit", zap.Error(reportErr))
|
||||
}
|
||||
|
||||
// Upload the repaired pieces
|
||||
successfulNodes, _, err := repairer.ec.Repair(ctx, putLimits, putPrivateKey, redundancy, segmentReader, repairer.timeout, minSuccessfulNeeded)
|
||||
if err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user