satellite/audit: move audit metrics out of reporter
Since we are sharing the reporting logic between repair and audit. We need to remove metric reporting logic in reporter. Change-Id: Ib87295ab19079329e7438327d785a7f5c21d3b21
This commit is contained in:
parent
252b78580d
commit
0b500a30e4
@ -35,8 +35,6 @@ type Report struct {
|
||||
Offlines storj.NodeIDList
|
||||
PendingAudits []*PendingAudit
|
||||
Unknown storj.NodeIDList
|
||||
TotalPieces int
|
||||
Completed bool
|
||||
}
|
||||
|
||||
// NewReporter instantiates a reporter.
|
||||
@ -46,7 +44,8 @@ func NewReporter(log *zap.Logger, reputations *reputation.Service, containment C
|
||||
reputations: reputations,
|
||||
containment: containment,
|
||||
maxRetries: maxRetries,
|
||||
maxReverifyCount: maxReverifyCount}
|
||||
maxReverifyCount: maxReverifyCount,
|
||||
}
|
||||
}
|
||||
|
||||
// RecordAudits saves audit results to overlay. When no error, it returns
|
||||
@ -69,9 +68,6 @@ func (reporter *Reporter) RecordAudits(ctx context.Context, req Report) (_ Repor
|
||||
zap.Int("pending", len(pendingAudits)),
|
||||
)
|
||||
|
||||
// record monkit stats for audit result
|
||||
req.recordStats()
|
||||
|
||||
var errlist errs.Group
|
||||
|
||||
tries := 0
|
||||
@ -231,56 +227,3 @@ func (reporter *Reporter) recordPendingAudits(ctx context.Context, pendingAudits
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (report Report) recordStats() {
|
||||
// If an audit was able to complete without auditing any nodes, that means
|
||||
// the segment has been altered.
|
||||
if report.Completed && len(report.Successes) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
totalInSegment := report.TotalPieces
|
||||
numOffline := len(report.Offlines)
|
||||
numSuccessful := len(report.Successes)
|
||||
numFailed := len(report.Fails)
|
||||
numContained := len(report.PendingAudits)
|
||||
numUnknown := len(report.Unknown)
|
||||
|
||||
totalAudited := numSuccessful + numFailed + numOffline + numContained
|
||||
auditedPercentage := float64(totalAudited) / float64(totalInSegment)
|
||||
offlinePercentage := float64(0)
|
||||
successfulPercentage := float64(0)
|
||||
failedPercentage := float64(0)
|
||||
containedPercentage := float64(0)
|
||||
unknownPercentage := float64(0)
|
||||
if totalAudited > 0 {
|
||||
offlinePercentage = float64(numOffline) / float64(totalAudited)
|
||||
successfulPercentage = float64(numSuccessful) / float64(totalAudited)
|
||||
failedPercentage = float64(numFailed) / float64(totalAudited)
|
||||
containedPercentage = float64(numContained) / float64(totalAudited)
|
||||
unknownPercentage = float64(numUnknown) / float64(totalAudited)
|
||||
}
|
||||
|
||||
mon.Meter("audit_success_nodes_global").Mark(numSuccessful) //mon:locked
|
||||
mon.Meter("audit_fail_nodes_global").Mark(numFailed) //mon:locked
|
||||
mon.Meter("audit_offline_nodes_global").Mark(numOffline) //mon:locked
|
||||
mon.Meter("audit_contained_nodes_global").Mark(numContained) //mon:locked
|
||||
mon.Meter("audit_unknown_nodes_global").Mark(numUnknown) //mon:locked
|
||||
mon.Meter("audit_total_nodes_global").Mark(totalAudited) //mon:locked
|
||||
mon.Meter("audit_total_pointer_nodes_global").Mark(totalInSegment) //mon:locked
|
||||
|
||||
mon.IntVal("audit_success_nodes").Observe(int64(numSuccessful)) //mon:locked
|
||||
mon.IntVal("audit_fail_nodes").Observe(int64(numFailed)) //mon:locked
|
||||
mon.IntVal("audit_offline_nodes").Observe(int64(numOffline)) //mon:locked
|
||||
mon.IntVal("audit_contained_nodes").Observe(int64(numContained)) //mon:locked
|
||||
mon.IntVal("audit_unknown_nodes").Observe(int64(numUnknown)) //mon:locked
|
||||
mon.IntVal("audit_total_nodes").Observe(int64(totalAudited)) //mon:locked
|
||||
mon.IntVal("audit_total_pointer_nodes").Observe(int64(totalInSegment)) //mon:locked
|
||||
mon.FloatVal("audited_percentage").Observe(auditedPercentage) //mon:locked
|
||||
mon.FloatVal("audit_offline_percentage").Observe(offlinePercentage) //mon:locked
|
||||
mon.FloatVal("audit_successful_percentage").Observe(successfulPercentage) //mon:locked
|
||||
mon.FloatVal("audit_failed_percentage").Observe(failedPercentage) //mon:locked
|
||||
mon.FloatVal("audit_contained_percentage").Observe(containedPercentage) //mon:locked
|
||||
mon.FloatVal("audit_unknown_percentage").Observe(unknownPercentage) //mon:locked
|
||||
|
||||
}
|
||||
|
@ -89,12 +89,7 @@ func (verifier *Verifier) Verify(ctx context.Context, segment Segment, skip map[
|
||||
|
||||
var segmentInfo metabase.Segment
|
||||
defer func() {
|
||||
if err == nil {
|
||||
report.Completed = true
|
||||
}
|
||||
if len(segmentInfo.Pieces) != 0 {
|
||||
report.TotalPieces = len(segmentInfo.Pieces)
|
||||
}
|
||||
recordStats(report, len(segmentInfo.Pieces), err)
|
||||
}()
|
||||
|
||||
if segment.Expired(verifier.nowFn()) {
|
||||
@ -840,3 +835,54 @@ func GetRandomStripe(ctx context.Context, segment metabase.Segment) (index int32
|
||||
|
||||
return randomStripeIndex, nil
|
||||
}
|
||||
|
||||
func recordStats(report Report, totalPieces int, verifyErr error) {
|
||||
// If an audit was able to complete without auditing any nodes, that means
|
||||
// the segment has been altered.
|
||||
if verifyErr == nil && len(report.Successes) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
numOffline := len(report.Offlines)
|
||||
numSuccessful := len(report.Successes)
|
||||
numFailed := len(report.Fails)
|
||||
numContained := len(report.PendingAudits)
|
||||
numUnknown := len(report.Unknown)
|
||||
|
||||
totalAudited := numSuccessful + numFailed + numOffline + numContained
|
||||
auditedPercentage := float64(totalAudited) / float64(totalPieces)
|
||||
offlinePercentage := float64(0)
|
||||
successfulPercentage := float64(0)
|
||||
failedPercentage := float64(0)
|
||||
containedPercentage := float64(0)
|
||||
unknownPercentage := float64(0)
|
||||
if totalAudited > 0 {
|
||||
offlinePercentage = float64(numOffline) / float64(totalAudited)
|
||||
successfulPercentage = float64(numSuccessful) / float64(totalAudited)
|
||||
failedPercentage = float64(numFailed) / float64(totalAudited)
|
||||
containedPercentage = float64(numContained) / float64(totalAudited)
|
||||
unknownPercentage = float64(numUnknown) / float64(totalAudited)
|
||||
}
|
||||
|
||||
mon.Meter("audit_success_nodes_global").Mark(numSuccessful) //mon:locked
|
||||
mon.Meter("audit_fail_nodes_global").Mark(numFailed) //mon:locked
|
||||
mon.Meter("audit_offline_nodes_global").Mark(numOffline) //mon:locked
|
||||
mon.Meter("audit_contained_nodes_global").Mark(numContained) //mon:locked
|
||||
mon.Meter("audit_unknown_nodes_global").Mark(numUnknown) //mon:locked
|
||||
mon.Meter("audit_total_nodes_global").Mark(totalAudited) //mon:locked
|
||||
mon.Meter("audit_total_pointer_nodes_global").Mark(totalPieces) //mon:locked
|
||||
|
||||
mon.IntVal("audit_success_nodes").Observe(int64(numSuccessful)) //mon:locked
|
||||
mon.IntVal("audit_fail_nodes").Observe(int64(numFailed)) //mon:locked
|
||||
mon.IntVal("audit_offline_nodes").Observe(int64(numOffline)) //mon:locked
|
||||
mon.IntVal("audit_contained_nodes").Observe(int64(numContained)) //mon:locked
|
||||
mon.IntVal("audit_unknown_nodes").Observe(int64(numUnknown)) //mon:locked
|
||||
mon.IntVal("audit_total_nodes").Observe(int64(totalAudited)) //mon:locked
|
||||
mon.IntVal("audit_total_pointer_nodes").Observe(int64(totalPieces)) //mon:locked
|
||||
mon.FloatVal("audited_percentage").Observe(auditedPercentage) //mon:locked
|
||||
mon.FloatVal("audit_offline_percentage").Observe(offlinePercentage) //mon:locked
|
||||
mon.FloatVal("audit_successful_percentage").Observe(successfulPercentage) //mon:locked
|
||||
mon.FloatVal("audit_failed_percentage").Observe(failedPercentage) //mon:locked
|
||||
mon.FloatVal("audit_contained_percentage").Observe(containedPercentage) //mon:locked
|
||||
mon.FloatVal("audit_unknown_percentage").Observe(unknownPercentage) //mon:locked
|
||||
}
|
||||
|
@ -673,8 +673,6 @@ func TestVerifierDeletedSegment(t *testing.T) {
|
||||
// Verify should not return an error, but report should be empty
|
||||
report, err := audits.Verifier.Verify(ctx, segment, nil)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, report.Completed)
|
||||
assert.Zero(t, report.TotalPieces)
|
||||
assert.Zero(t, report.Successes)
|
||||
assert.Zero(t, report.Fails)
|
||||
assert.Zero(t, report.Offlines)
|
||||
@ -726,13 +724,11 @@ func TestVerifierModifiedSegment(t *testing.T) {
|
||||
// Verify should not return an error, but report should be empty
|
||||
report, err := audits.Verifier.Verify(ctx, queueSegment, nil)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, report.Completed)
|
||||
assert.Zero(t, report.Successes)
|
||||
assert.Zero(t, report.Fails)
|
||||
assert.Zero(t, report.Offlines)
|
||||
assert.Zero(t, report.PendingAudits)
|
||||
assert.Zero(t, report.Unknown)
|
||||
assert.Equal(t, report.TotalPieces, len(segment.Pieces))
|
||||
})
|
||||
}
|
||||
|
||||
@ -757,12 +753,6 @@ func TestVerifierReplacedSegment(t *testing.T) {
|
||||
segment, err := queue.Next()
|
||||
require.NoError(t, err)
|
||||
|
||||
segmentInfo, err := satellite.Metabase.DB.GetSegmentByPosition(ctx, metabase.GetSegmentByPosition{
|
||||
StreamID: segment.StreamID,
|
||||
Position: segment.Position,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
audits.Verifier.OnTestingCheckSegmentAlteredHook = func() {
|
||||
// replace the file so that checkIfSegmentAltered fails
|
||||
err := ul.Upload(ctx, satellite, "testbucket", "test/path", testData)
|
||||
@ -772,8 +762,6 @@ func TestVerifierReplacedSegment(t *testing.T) {
|
||||
// Verify should not return an error, but report should be empty
|
||||
report, err := audits.Verifier.Verify(ctx, segment, nil)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, report.Completed)
|
||||
assert.Equal(t, report.TotalPieces, len(segmentInfo.Pieces))
|
||||
assert.Zero(t, report.Successes)
|
||||
assert.Zero(t, report.Fails)
|
||||
assert.Zero(t, report.Offlines)
|
||||
@ -823,9 +811,7 @@ func TestVerifierModifiedSegmentFailsOnce(t *testing.T) {
|
||||
assert.Len(t, report.Successes, origNumPieces-1)
|
||||
assert.Len(t, report.Fails, 1)
|
||||
assert.Equal(t, report.Fails[0], piece.StorageNode)
|
||||
assert.Equal(t, report.TotalPieces, origNumPieces)
|
||||
assert.Len(t, report.Offlines, 0)
|
||||
assert.True(t, report.Completed)
|
||||
require.Len(t, report.PendingAudits, 0)
|
||||
})
|
||||
}
|
||||
@ -882,8 +868,6 @@ func TestVerifierSlowDownload(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.NotContains(t, report.Successes, slowNode.ID())
|
||||
assert.Equal(t, report.TotalPieces, len(segment.Pieces))
|
||||
assert.True(t, report.Completed)
|
||||
assert.Len(t, report.Fails, 0)
|
||||
assert.Len(t, report.Offlines, 0)
|
||||
assert.Len(t, report.Unknown, 0)
|
||||
@ -935,8 +919,6 @@ func TestVerifierUnknownError(t *testing.T) {
|
||||
report, err := audits.Verifier.Verify(ctx, queueSegment, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.True(t, report.Completed)
|
||||
assert.Equal(t, report.TotalPieces, len(segment.Pieces))
|
||||
assert.Len(t, report.Successes, 3)
|
||||
assert.Len(t, report.Fails, 0)
|
||||
assert.Len(t, report.Offlines, 0)
|
||||
|
Loading…
Reference in New Issue
Block a user