satellite/audit: move audit metrics out of reporter

Since we are sharing the reporting logic between repair and audit. We
need to remove metric reporting logic in reporter.

Change-Id: Ib87295ab19079329e7438327d785a7f5c21d3b21
This commit is contained in:
Yingrong Zhao 2021-09-15 16:31:33 -04:00 committed by Yingrong Zhao
parent 252b78580d
commit 0b500a30e4
3 changed files with 54 additions and 83 deletions

View File

@ -35,8 +35,6 @@ type Report struct {
Offlines storj.NodeIDList
PendingAudits []*PendingAudit
Unknown storj.NodeIDList
TotalPieces int
Completed bool
}
// NewReporter instantiates a reporter.
@ -46,7 +44,8 @@ func NewReporter(log *zap.Logger, reputations *reputation.Service, containment C
reputations: reputations,
containment: containment,
maxRetries: maxRetries,
maxReverifyCount: maxReverifyCount}
maxReverifyCount: maxReverifyCount,
}
}
// RecordAudits saves audit results to overlay. When no error, it returns
@ -69,9 +68,6 @@ func (reporter *Reporter) RecordAudits(ctx context.Context, req Report) (_ Repor
zap.Int("pending", len(pendingAudits)),
)
// record monkit stats for audit result
req.recordStats()
var errlist errs.Group
tries := 0
@ -231,56 +227,3 @@ func (reporter *Reporter) recordPendingAudits(ctx context.Context, pendingAudits
}
return nil, nil
}
func (report Report) recordStats() {
// If an audit was able to complete without auditing any nodes, that means
// the segment has been altered.
if report.Completed && len(report.Successes) == 0 {
return
}
totalInSegment := report.TotalPieces
numOffline := len(report.Offlines)
numSuccessful := len(report.Successes)
numFailed := len(report.Fails)
numContained := len(report.PendingAudits)
numUnknown := len(report.Unknown)
totalAudited := numSuccessful + numFailed + numOffline + numContained
auditedPercentage := float64(totalAudited) / float64(totalInSegment)
offlinePercentage := float64(0)
successfulPercentage := float64(0)
failedPercentage := float64(0)
containedPercentage := float64(0)
unknownPercentage := float64(0)
if totalAudited > 0 {
offlinePercentage = float64(numOffline) / float64(totalAudited)
successfulPercentage = float64(numSuccessful) / float64(totalAudited)
failedPercentage = float64(numFailed) / float64(totalAudited)
containedPercentage = float64(numContained) / float64(totalAudited)
unknownPercentage = float64(numUnknown) / float64(totalAudited)
}
mon.Meter("audit_success_nodes_global").Mark(numSuccessful) //mon:locked
mon.Meter("audit_fail_nodes_global").Mark(numFailed) //mon:locked
mon.Meter("audit_offline_nodes_global").Mark(numOffline) //mon:locked
mon.Meter("audit_contained_nodes_global").Mark(numContained) //mon:locked
mon.Meter("audit_unknown_nodes_global").Mark(numUnknown) //mon:locked
mon.Meter("audit_total_nodes_global").Mark(totalAudited) //mon:locked
mon.Meter("audit_total_pointer_nodes_global").Mark(totalInSegment) //mon:locked
mon.IntVal("audit_success_nodes").Observe(int64(numSuccessful)) //mon:locked
mon.IntVal("audit_fail_nodes").Observe(int64(numFailed)) //mon:locked
mon.IntVal("audit_offline_nodes").Observe(int64(numOffline)) //mon:locked
mon.IntVal("audit_contained_nodes").Observe(int64(numContained)) //mon:locked
mon.IntVal("audit_unknown_nodes").Observe(int64(numUnknown)) //mon:locked
mon.IntVal("audit_total_nodes").Observe(int64(totalAudited)) //mon:locked
mon.IntVal("audit_total_pointer_nodes").Observe(int64(totalInSegment)) //mon:locked
mon.FloatVal("audited_percentage").Observe(auditedPercentage) //mon:locked
mon.FloatVal("audit_offline_percentage").Observe(offlinePercentage) //mon:locked
mon.FloatVal("audit_successful_percentage").Observe(successfulPercentage) //mon:locked
mon.FloatVal("audit_failed_percentage").Observe(failedPercentage) //mon:locked
mon.FloatVal("audit_contained_percentage").Observe(containedPercentage) //mon:locked
mon.FloatVal("audit_unknown_percentage").Observe(unknownPercentage) //mon:locked
}

View File

@ -89,12 +89,7 @@ func (verifier *Verifier) Verify(ctx context.Context, segment Segment, skip map[
var segmentInfo metabase.Segment
defer func() {
if err == nil {
report.Completed = true
}
if len(segmentInfo.Pieces) != 0 {
report.TotalPieces = len(segmentInfo.Pieces)
}
recordStats(report, len(segmentInfo.Pieces), err)
}()
if segment.Expired(verifier.nowFn()) {
@ -840,3 +835,54 @@ func GetRandomStripe(ctx context.Context, segment metabase.Segment) (index int32
return randomStripeIndex, nil
}
func recordStats(report Report, totalPieces int, verifyErr error) {
// If an audit was able to complete without auditing any nodes, that means
// the segment has been altered.
if verifyErr == nil && len(report.Successes) == 0 {
return
}
numOffline := len(report.Offlines)
numSuccessful := len(report.Successes)
numFailed := len(report.Fails)
numContained := len(report.PendingAudits)
numUnknown := len(report.Unknown)
totalAudited := numSuccessful + numFailed + numOffline + numContained
auditedPercentage := float64(totalAudited) / float64(totalPieces)
offlinePercentage := float64(0)
successfulPercentage := float64(0)
failedPercentage := float64(0)
containedPercentage := float64(0)
unknownPercentage := float64(0)
if totalAudited > 0 {
offlinePercentage = float64(numOffline) / float64(totalAudited)
successfulPercentage = float64(numSuccessful) / float64(totalAudited)
failedPercentage = float64(numFailed) / float64(totalAudited)
containedPercentage = float64(numContained) / float64(totalAudited)
unknownPercentage = float64(numUnknown) / float64(totalAudited)
}
mon.Meter("audit_success_nodes_global").Mark(numSuccessful) //mon:locked
mon.Meter("audit_fail_nodes_global").Mark(numFailed) //mon:locked
mon.Meter("audit_offline_nodes_global").Mark(numOffline) //mon:locked
mon.Meter("audit_contained_nodes_global").Mark(numContained) //mon:locked
mon.Meter("audit_unknown_nodes_global").Mark(numUnknown) //mon:locked
mon.Meter("audit_total_nodes_global").Mark(totalAudited) //mon:locked
mon.Meter("audit_total_pointer_nodes_global").Mark(totalPieces) //mon:locked
mon.IntVal("audit_success_nodes").Observe(int64(numSuccessful)) //mon:locked
mon.IntVal("audit_fail_nodes").Observe(int64(numFailed)) //mon:locked
mon.IntVal("audit_offline_nodes").Observe(int64(numOffline)) //mon:locked
mon.IntVal("audit_contained_nodes").Observe(int64(numContained)) //mon:locked
mon.IntVal("audit_unknown_nodes").Observe(int64(numUnknown)) //mon:locked
mon.IntVal("audit_total_nodes").Observe(int64(totalAudited)) //mon:locked
mon.IntVal("audit_total_pointer_nodes").Observe(int64(totalPieces)) //mon:locked
mon.FloatVal("audited_percentage").Observe(auditedPercentage) //mon:locked
mon.FloatVal("audit_offline_percentage").Observe(offlinePercentage) //mon:locked
mon.FloatVal("audit_successful_percentage").Observe(successfulPercentage) //mon:locked
mon.FloatVal("audit_failed_percentage").Observe(failedPercentage) //mon:locked
mon.FloatVal("audit_contained_percentage").Observe(containedPercentage) //mon:locked
mon.FloatVal("audit_unknown_percentage").Observe(unknownPercentage) //mon:locked
}

View File

@ -673,8 +673,6 @@ func TestVerifierDeletedSegment(t *testing.T) {
// Verify should not return an error, but report should be empty
report, err := audits.Verifier.Verify(ctx, segment, nil)
require.NoError(t, err)
assert.True(t, report.Completed)
assert.Zero(t, report.TotalPieces)
assert.Zero(t, report.Successes)
assert.Zero(t, report.Fails)
assert.Zero(t, report.Offlines)
@ -726,13 +724,11 @@ func TestVerifierModifiedSegment(t *testing.T) {
// Verify should not return an error, but report should be empty
report, err := audits.Verifier.Verify(ctx, queueSegment, nil)
require.NoError(t, err)
assert.True(t, report.Completed)
assert.Zero(t, report.Successes)
assert.Zero(t, report.Fails)
assert.Zero(t, report.Offlines)
assert.Zero(t, report.PendingAudits)
assert.Zero(t, report.Unknown)
assert.Equal(t, report.TotalPieces, len(segment.Pieces))
})
}
@ -757,12 +753,6 @@ func TestVerifierReplacedSegment(t *testing.T) {
segment, err := queue.Next()
require.NoError(t, err)
segmentInfo, err := satellite.Metabase.DB.GetSegmentByPosition(ctx, metabase.GetSegmentByPosition{
StreamID: segment.StreamID,
Position: segment.Position,
})
require.NoError(t, err)
audits.Verifier.OnTestingCheckSegmentAlteredHook = func() {
// replace the file so that checkIfSegmentAltered fails
err := ul.Upload(ctx, satellite, "testbucket", "test/path", testData)
@ -772,8 +762,6 @@ func TestVerifierReplacedSegment(t *testing.T) {
// Verify should not return an error, but report should be empty
report, err := audits.Verifier.Verify(ctx, segment, nil)
require.NoError(t, err)
assert.True(t, report.Completed)
assert.Equal(t, report.TotalPieces, len(segmentInfo.Pieces))
assert.Zero(t, report.Successes)
assert.Zero(t, report.Fails)
assert.Zero(t, report.Offlines)
@ -823,9 +811,7 @@ func TestVerifierModifiedSegmentFailsOnce(t *testing.T) {
assert.Len(t, report.Successes, origNumPieces-1)
assert.Len(t, report.Fails, 1)
assert.Equal(t, report.Fails[0], piece.StorageNode)
assert.Equal(t, report.TotalPieces, origNumPieces)
assert.Len(t, report.Offlines, 0)
assert.True(t, report.Completed)
require.Len(t, report.PendingAudits, 0)
})
}
@ -882,8 +868,6 @@ func TestVerifierSlowDownload(t *testing.T) {
require.NoError(t, err)
assert.NotContains(t, report.Successes, slowNode.ID())
assert.Equal(t, report.TotalPieces, len(segment.Pieces))
assert.True(t, report.Completed)
assert.Len(t, report.Fails, 0)
assert.Len(t, report.Offlines, 0)
assert.Len(t, report.Unknown, 0)
@ -935,8 +919,6 @@ func TestVerifierUnknownError(t *testing.T) {
report, err := audits.Verifier.Verify(ctx, queueSegment, nil)
require.NoError(t, err)
assert.True(t, report.Completed)
assert.Equal(t, report.TotalPieces, len(segment.Pieces))
assert.Len(t, report.Successes, 3)
assert.Len(t, report.Fails, 0)
assert.Len(t, report.Offlines, 0)