satellite/reputation: if node is DQd or exited skip applying audit

If a node is already disqualified or exited, don't bother going through
the audit reputation code. If there's an additional reputation update to
be made based on the automatic offline audit these nodes would get, the
overlay DB rejects it anyway.

Change-Id: I48ba151aa640b32f87e1285521be465d1f692a78
This commit is contained in:
Cameron 2023-02-17 14:21:54 -05:00 committed by Storj Robot
parent 26f80f8e26
commit 38f94c1c29
2 changed files with 69 additions and 11 deletions

View File

@ -105,6 +105,25 @@ func NewService(log *zap.Logger, overlay *overlay.Service, db DB, config Config)
func (service *Service) ApplyAudit(ctx context.Context, nodeID storj.NodeID, reputation overlay.ReputationStatus, result AuditType) (err error) {
defer mon.Task()(&ctx)(&err)
// There are some cases where the caller did not get updated reputation-status information.
// (Usually this means the node was offline, disqualified, or exited and we skipped creating an order limit for it.)
var nodeExited bool
if reputation.Email == "" {
dossier, err := service.overlay.Get(ctx, nodeID)
if err != nil {
return err
}
reputation = dossier.Reputation.Status
if dossier.ExitStatus.ExitFinishedAt != nil {
nodeExited = true
}
}
// If the node is disqualified or exited, we do not need to apply the audit, so return nil.
if reputation.Disqualified != nil || nodeExited {
return nil
}
now := time.Now()
statusUpdate, err := service.db.Update(ctx, UpdateRequest{
NodeID: nodeID,
@ -115,16 +134,6 @@ func (service *Service) ApplyAudit(ctx context.Context, nodeID storj.NodeID, rep
return err
}
// There are some cases where the caller did not get updated reputation-status information.
// (Usually this means the node was offline and we skipped creating an order limit for it.)
if reputation.Email == "" {
dossier, err := service.overlay.Get(ctx, nodeID)
if err != nil {
return err
}
reputation = dossier.Reputation.Status
}
// only update node if its health status has changed, or it's a newly vetted
// node.
// this prevents the need to require caller of ApplyAudit() to always know
@ -260,7 +269,8 @@ func (service *Service) Close() error { return nil }
// hasReputationChanged determines if the current node reputation is different from the newly updated reputation. This
// function will only consider the Disqualified, UnknownAudiSuspended and OfflineSuspended statuses for changes.
func hasReputationChanged(updated Info, current overlay.ReputationStatus, now time.Time) (changed bool, repChanges []nodeevents.Type) {
if statusChanged(current.Disqualified, updated.Disqualified) {
// there is no unDQ, so only update if changed from nil to not nil
if current.Disqualified == nil && updated.Disqualified != nil {
repChanges = append(repChanges, nodeevents.Disqualified)
changed = true
}

View File

@ -5,6 +5,7 @@ package reputation_test
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@ -190,3 +191,50 @@ func TestDisqualificationAuditFailure(t *testing.T) {
assert.NotNil(t, nodeInfo.Disqualified)
})
}
func TestExitedAndDQNodesGetNoAudit(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 3, UplinkCount: 0,
Reconfigure: testplanet.Reconfigure{
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
config.Reputation.InitialAlpha = 1
config.Reputation.AuditLambda = 1
config.Reputation.AuditWeight = 1
config.Reputation.AuditDQ = 0.4
},
},
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
satel := planet.Satellites[0]
okNode := planet.StorageNodes[0].ID()
dqNode := planet.StorageNodes[1].ID()
exitNode := planet.StorageNodes[2].ID()
// Ok node gets audit
require.NoError(t, satel.Reputation.Service.ApplyAudit(ctx, okNode, overlay.ReputationStatus{}, reputation.AuditOffline))
info, err := satel.Reputation.Service.Get(ctx, okNode)
require.NoError(t, err)
require.Equal(t, int64(1), info.TotalAuditCount)
// DQ node
require.NoError(t, satel.Overlay.Service.DisqualifyNode(ctx, dqNode, overlay.DisqualificationReasonAuditFailure))
require.NoError(t, satel.Reputation.Service.ApplyAudit(ctx, dqNode, overlay.ReputationStatus{}, reputation.AuditOffline))
info, err = satel.Reputation.Service.Get(ctx, dqNode)
require.NoError(t, err)
require.Zero(t, info.TotalAuditCount)
// Exit node
now := time.Now()
_, err = satel.Overlay.DB.UpdateExitStatus(ctx, &overlay.ExitStatusRequest{
NodeID: exitNode,
ExitInitiatedAt: now,
ExitLoopCompletedAt: now,
ExitFinishedAt: now,
ExitSuccess: true,
})
require.NoError(t, err)
require.NoError(t, satel.Reputation.Service.ApplyAudit(ctx, exitNode, overlay.ReputationStatus{}, reputation.AuditOffline))
info, err = satel.Reputation.Service.Get(ctx, exitNode)
require.NoError(t, err)
require.Zero(t, info.TotalAuditCount)
})
}