satellite/repair: only record audit result if segment can be downloaded
If satellite can't find enough nodes to successfully download a segment, it probably is not the fault of storage nodes. Change-Id: I681f66056df0bb940da9edb3a7dbb3658c0a56cb
This commit is contained in:
parent
3de7f8d5af
commit
336500c04d
@ -569,27 +569,16 @@ func testFailedDataRepair(t *testing.T, inMemoryRepair bool) {
|
|||||||
nodesReputationAfter[piece.StorageNode] = *info
|
nodesReputationAfter[piece.StorageNode] = *info
|
||||||
}
|
}
|
||||||
|
|
||||||
// repair should update audit status
|
// repair shouldn't update audit status
|
||||||
for _, piece := range availablePieces[1 : len(availablePieces)-1] {
|
for _, piece := range availablePieces {
|
||||||
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
||||||
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
||||||
require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount)
|
require.Equal(t, successfulNodeReputation.TotalAuditCount, successfulNodeReputationAfter.TotalAuditCount)
|
||||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount)
|
require.Equal(t, successfulNodeReputation.AuditSuccessCount, successfulNodeReputationAfter.AuditSuccessCount)
|
||||||
require.True(t, successfulNodeReputation.AuditReputationAlpha < successfulNodeReputationAfter.AuditReputationAlpha)
|
require.Equal(t, successfulNodeReputation.AuditReputationAlpha, successfulNodeReputationAfter.AuditReputationAlpha)
|
||||||
require.True(t, successfulNodeReputation.AuditReputationBeta >= successfulNodeReputationAfter.AuditReputationBeta)
|
require.Equal(t, successfulNodeReputation.AuditReputationBeta, successfulNodeReputationAfter.AuditReputationBeta)
|
||||||
}
|
}
|
||||||
|
|
||||||
offlineNodeReputation := nodesReputation[offlinePiece.StorageNode]
|
|
||||||
offlineNodeReputationAfter := nodesReputationAfter[offlinePiece.StorageNode]
|
|
||||||
require.Equal(t, offlineNodeReputation.TotalAuditCount+1, offlineNodeReputationAfter.TotalAuditCount)
|
|
||||||
require.Equal(t, int32(0), offlineNodeReputationAfter.AuditHistory.Windows[0].OnlineCount)
|
|
||||||
|
|
||||||
badNodeReputation := nodesReputation[unknownPiece.StorageNode]
|
|
||||||
badNodeReputationAfter := nodesReputationAfter[unknownPiece.StorageNode]
|
|
||||||
require.Equal(t, badNodeReputation.TotalAuditCount+1, badNodeReputationAfter.TotalAuditCount)
|
|
||||||
require.True(t, badNodeReputation.UnknownAuditReputationBeta < badNodeReputationAfter.UnknownAuditReputationBeta)
|
|
||||||
require.True(t, badNodeReputation.UnknownAuditReputationAlpha >= badNodeReputationAfter.UnknownAuditReputationAlpha)
|
|
||||||
|
|
||||||
// repair should fail, so segment should contain all the original nodes
|
// repair should fail, so segment should contain all the original nodes
|
||||||
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||||
for _, piece := range segmentAfter.Pieces {
|
for _, piece := range segmentAfter.Pieces {
|
||||||
@ -1077,22 +1066,16 @@ func testMissingPieceDataRepairFailed(t *testing.T, inMemoryRepair bool) {
|
|||||||
nodesReputationAfter[piece.StorageNode] = *info
|
nodesReputationAfter[piece.StorageNode] = *info
|
||||||
}
|
}
|
||||||
|
|
||||||
// repair should update audit status
|
// repair shouldn't update audit status
|
||||||
for _, piece := range successful {
|
for _, piece := range successful {
|
||||||
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
||||||
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
||||||
require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount)
|
require.Equal(t, successfulNodeReputation.TotalAuditCount, successfulNodeReputationAfter.TotalAuditCount)
|
||||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount)
|
require.Equal(t, successfulNodeReputation.AuditSuccessCount, successfulNodeReputationAfter.AuditSuccessCount)
|
||||||
require.True(t, successfulNodeReputation.AuditReputationAlpha < successfulNodeReputationAfter.AuditReputationAlpha)
|
require.Equal(t, successfulNodeReputation.AuditReputationAlpha, successfulNodeReputationAfter.AuditReputationAlpha)
|
||||||
require.True(t, successfulNodeReputation.AuditReputationBeta >= successfulNodeReputationAfter.AuditReputationBeta)
|
require.Equal(t, successfulNodeReputation.AuditReputationBeta, successfulNodeReputationAfter.AuditReputationBeta)
|
||||||
}
|
}
|
||||||
|
|
||||||
missingPieceNodeReputation := nodesReputation[missingPiece.StorageNode]
|
|
||||||
missingPieceNodeReputationAfter := nodesReputationAfter[missingPiece.StorageNode]
|
|
||||||
require.Equal(t, missingPieceNodeReputation.TotalAuditCount+1, missingPieceNodeReputationAfter.TotalAuditCount)
|
|
||||||
require.True(t, missingPieceNodeReputation.AuditReputationBeta < missingPieceNodeReputationAfter.AuditReputationBeta)
|
|
||||||
require.True(t, missingPieceNodeReputation.AuditReputationAlpha >= missingPieceNodeReputationAfter.AuditReputationAlpha)
|
|
||||||
|
|
||||||
// repair should fail, so segment should contain all the original nodes
|
// repair should fail, so segment should contain all the original nodes
|
||||||
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||||
for _, piece := range segmentAfter.Pieces {
|
for _, piece := range segmentAfter.Pieces {
|
||||||
@ -1325,22 +1308,16 @@ func testCorruptDataRepairFailed(t *testing.T, inMemoryRepair bool) {
|
|||||||
nodesReputationAfter[piece.StorageNode] = *info
|
nodesReputationAfter[piece.StorageNode] = *info
|
||||||
}
|
}
|
||||||
|
|
||||||
// repair should update audit status
|
// repair shouldn't update audit status
|
||||||
for _, piece := range successful {
|
for _, piece := range successful {
|
||||||
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
successfulNodeReputation := nodesReputation[piece.StorageNode]
|
||||||
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
successfulNodeReputationAfter := nodesReputationAfter[piece.StorageNode]
|
||||||
require.Equal(t, successfulNodeReputation.TotalAuditCount+1, successfulNodeReputationAfter.TotalAuditCount)
|
require.Equal(t, successfulNodeReputation.TotalAuditCount, successfulNodeReputationAfter.TotalAuditCount)
|
||||||
require.Equal(t, successfulNodeReputation.AuditSuccessCount+1, successfulNodeReputationAfter.AuditSuccessCount)
|
require.Equal(t, successfulNodeReputation.AuditSuccessCount, successfulNodeReputationAfter.AuditSuccessCount)
|
||||||
require.True(t, successfulNodeReputation.AuditReputationAlpha < successfulNodeReputationAfter.AuditReputationAlpha)
|
require.Equal(t, successfulNodeReputation.AuditReputationAlpha, successfulNodeReputationAfter.AuditReputationAlpha)
|
||||||
require.True(t, successfulNodeReputation.AuditReputationBeta >= successfulNodeReputationAfter.AuditReputationBeta)
|
require.Equal(t, successfulNodeReputation.AuditReputationBeta, successfulNodeReputationAfter.AuditReputationBeta)
|
||||||
}
|
}
|
||||||
|
|
||||||
corruptedNodeReputation := nodesReputation[corruptedPiece.StorageNode]
|
|
||||||
corruptedNodeReputationAfter := nodesReputationAfter[corruptedPiece.StorageNode]
|
|
||||||
require.Equal(t, corruptedNodeReputation.TotalAuditCount+1, corruptedNodeReputationAfter.TotalAuditCount)
|
|
||||||
require.True(t, corruptedNodeReputation.AuditReputationBeta < corruptedNodeReputationAfter.AuditReputationBeta)
|
|
||||||
require.True(t, corruptedNodeReputation.AuditReputationAlpha >= corruptedNodeReputationAfter.AuditReputationAlpha)
|
|
||||||
|
|
||||||
// repair should fail, so segment should contain all the original nodes
|
// repair should fail, so segment should contain all the original nodes
|
||||||
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
segmentAfter, _ := getRemoteSegment(ctx, t, satellite, planet.Uplinks[0].Projects[0].ID, "testbucket")
|
||||||
for _, piece := range segmentAfter.Pieces {
|
for _, piece := range segmentAfter.Pieces {
|
||||||
|
@ -315,24 +315,6 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
|
|||||||
if len(piecesReport.Contained) > 0 {
|
if len(piecesReport.Contained) > 0 {
|
||||||
repairer.log.Debug("unexpected contained pieces during repair", zap.Int("count", len(piecesReport.Contained)))
|
repairer.log.Debug("unexpected contained pieces during repair", zap.Int("count", len(piecesReport.Contained)))
|
||||||
}
|
}
|
||||||
var report audit.Report
|
|
||||||
for _, piece := range piecesReport.Successful {
|
|
||||||
report.Successes = append(report.Successes, piece.StorageNode)
|
|
||||||
}
|
|
||||||
for _, piece := range piecesReport.Failed {
|
|
||||||
report.Fails = append(report.Fails, piece.StorageNode)
|
|
||||||
}
|
|
||||||
for _, piece := range piecesReport.Offline {
|
|
||||||
report.Offlines = append(report.Offlines, piece.StorageNode)
|
|
||||||
}
|
|
||||||
for _, piece := range piecesReport.Unknown {
|
|
||||||
report.Unknown = append(report.Unknown, piece.StorageNode)
|
|
||||||
}
|
|
||||||
_, reportErr := repairer.reporter.RecordAudits(ctx, report)
|
|
||||||
if reportErr != nil {
|
|
||||||
// failed updates should not affect repair, therefore we will not return the error
|
|
||||||
repairer.log.Debug("failed to record audit", zap.Error(reportErr))
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// If the context was closed during the Get phase, it will appear here as though
|
// If the context was closed during the Get phase, it will appear here as though
|
||||||
@ -363,6 +345,26 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
|
|||||||
}
|
}
|
||||||
defer func() { err = errs.Combine(err, segmentReader.Close()) }()
|
defer func() { err = errs.Combine(err, segmentReader.Close()) }()
|
||||||
|
|
||||||
|
// only report audit result when segment can be successfully downloaded
|
||||||
|
var report audit.Report
|
||||||
|
for _, piece := range piecesReport.Successful {
|
||||||
|
report.Successes = append(report.Successes, piece.StorageNode)
|
||||||
|
}
|
||||||
|
for _, piece := range piecesReport.Failed {
|
||||||
|
report.Fails = append(report.Fails, piece.StorageNode)
|
||||||
|
}
|
||||||
|
for _, piece := range piecesReport.Offline {
|
||||||
|
report.Offlines = append(report.Offlines, piece.StorageNode)
|
||||||
|
}
|
||||||
|
for _, piece := range piecesReport.Unknown {
|
||||||
|
report.Unknown = append(report.Unknown, piece.StorageNode)
|
||||||
|
}
|
||||||
|
_, reportErr := repairer.reporter.RecordAudits(ctx, report)
|
||||||
|
if reportErr != nil {
|
||||||
|
// failed updates should not affect repair, therefore we will not return the error
|
||||||
|
repairer.log.Debug("failed to record audit", zap.Error(reportErr))
|
||||||
|
}
|
||||||
|
|
||||||
// Upload the repaired pieces
|
// Upload the repaired pieces
|
||||||
successfulNodes, _, err := repairer.ec.Repair(ctx, putLimits, putPrivateKey, redundancy, segmentReader, repairer.timeout, minSuccessfulNeeded)
|
successfulNodes, _, err := repairer.ec.Repair(ctx, putLimits, putPrivateKey, redundancy, segmentReader, repairer.timeout, minSuccessfulNeeded)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user