From 3f47d19aa6a781a58b3b3c7d3a45557fdfb2edc3 Mon Sep 17 00:00:00 2001 From: Yaroslav Vorobiov Date: Wed, 27 Oct 2021 13:58:29 +0300 Subject: [PATCH] satellite/overlay: add disqualification reason Add disqualification reason to NodeDossier. Extend DB.DisqualifyNode with disqualification reason. Extend reputation Service.TestDisqualifyNode with disqualification reason. Change-Id: I8611b6340c7f42ac1bb8bd0fd7f0648ad650ab2d --- satellite/accounting/rollup/rollup_test.go | 3 +- satellite/audit/disqualification_test.go | 6 +- satellite/gracefulexit/endpoint.go | 2 +- satellite/gracefulexit/endpoint_test.go | 4 +- satellite/overlay/benchmark_test.go | 2 +- satellite/overlay/db_test.go | 74 ++++++++++++++++++++++ satellite/overlay/service.go | 39 ++++++------ satellite/overlay/service_test.go | 4 +- satellite/overlay/statdb_test.go | 2 +- satellite/repair/repair_test.go | 14 ++-- satellite/reputation/service.go | 8 ++- satellite/satellitedb/overlaycache.go | 28 ++++---- 12 files changed, 135 insertions(+), 51 deletions(-) diff --git a/satellite/accounting/rollup/rollup_test.go b/satellite/accounting/rollup/rollup_test.go index 2ae880c20..fa84be381 100644 --- a/satellite/accounting/rollup/rollup_test.go +++ b/satellite/accounting/rollup/rollup_test.go @@ -18,6 +18,7 @@ import ( "storj.io/storj/private/testplanet" "storj.io/storj/satellite" "storj.io/storj/satellite/orders" + "storj.io/storj/satellite/overlay" ) func TestRollupNoDeletes(t *testing.T) { @@ -366,7 +367,7 @@ func dqNodes(ctx *testcontext.Context, planet *testplanet.Planet) (map[storj.Nod continue } - err := planet.Satellites[0].Overlay.DB.DisqualifyNode(ctx, n.ID()) + err := planet.Satellites[0].Overlay.DB.DisqualifyNode(ctx, n.ID(), time.Now().UTC(), overlay.DisqualificationReasonUnknown) if err != nil { return nil, err } diff --git a/satellite/audit/disqualification_test.go b/satellite/audit/disqualification_test.go index 6f911750e..43fdb1856 100644 --- a/satellite/audit/disqualification_test.go +++ b/satellite/audit/disqualification_test.go @@ -137,7 +137,7 @@ func TestDisqualifiedNodesGetNoDownload(t *testing.T) { segment := segments[0] disqualifiedNode := segment.Pieces[0].StorageNode - err = satellitePeer.Reputation.Service.TestDisqualifyNode(ctx, disqualifiedNode) + err = satellitePeer.Reputation.Service.TestDisqualifyNode(ctx, disqualifiedNode, overlay.DisqualificationReasonUnknown) require.NoError(t, err) limits, _, err := satellitePeer.Orders.Service.CreateGetOrderLimits(ctx, bucket, segment, 0) @@ -170,7 +170,7 @@ func TestDisqualifiedNodesGetNoUpload(t *testing.T) { disqualifiedNode := planet.StorageNodes[0] satellitePeer.Audit.Worker.Loop.Pause() - err := satellitePeer.Reputation.Service.TestDisqualifyNode(ctx, disqualifiedNode.ID()) + err := satellitePeer.Reputation.Service.TestDisqualifyNode(ctx, disqualifiedNode.ID(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) request := overlay.FindStorageNodesRequest{ @@ -213,7 +213,7 @@ func TestDisqualifiedNodeRemainsDisqualified(t *testing.T) { satellitePeer.Audit.Worker.Loop.Pause() disqualifiedNode := planet.StorageNodes[0] - err := satellitePeer.Reputation.Service.TestDisqualifyNode(ctx, disqualifiedNode.ID()) + err := satellitePeer.Reputation.Service.TestDisqualifyNode(ctx, disqualifiedNode.ID(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) info := overlay.NodeCheckInInfo{ diff --git a/satellite/gracefulexit/endpoint.go b/satellite/gracefulexit/endpoint.go index 4ff3e43c6..262b01a3b 100644 --- a/satellite/gracefulexit/endpoint.go +++ b/satellite/gracefulexit/endpoint.go @@ -714,7 +714,7 @@ func (endpoint *Endpoint) getFinishedMessage(ctx context.Context, nodeID storj.N message = &pb.SatelliteMessage{Message: &pb.SatelliteMessage_ExitFailed{ ExitFailed: signed, }} - err = endpoint.overlay.DisqualifyNode(ctx, nodeID) + err = endpoint.overlay.DisqualifyNode(ctx, nodeID, overlay.DisqualificationReasonUnknown) if err != nil { return nil, Error.Wrap(err) } diff --git a/satellite/gracefulexit/endpoint_test.go b/satellite/gracefulexit/endpoint_test.go index fa445cc4f..78ec1a9b5 100644 --- a/satellite/gracefulexit/endpoint_test.go +++ b/satellite/gracefulexit/endpoint_test.go @@ -409,7 +409,7 @@ func TestExitDisqualifiedNodeFailOnStart(t *testing.T) { satellite := planet.Satellites[0] exitingNode := planet.StorageNodes[0] - err := satellite.DB.OverlayCache().DisqualifyNode(ctx, exitingNode.ID()) + err := satellite.DB.OverlayCache().DisqualifyNode(ctx, exitingNode.ID(), time.Now(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) conn, err := exitingNode.Dialer.DialNodeURL(ctx, satellite.NodeURL()) @@ -452,7 +452,7 @@ func TestExitDisqualifiedNodeFailEventually(t *testing.T) { } if !isDisqualified { - err := satellite.DB.OverlayCache().DisqualifyNode(ctx, exitingNode.ID()) + err := satellite.DB.OverlayCache().DisqualifyNode(ctx, exitingNode.ID(), time.Now(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) } diff --git a/satellite/overlay/benchmark_test.go b/satellite/overlay/benchmark_test.go index e9121db7b..719def788 100644 --- a/satellite/overlay/benchmark_test.go +++ b/satellite/overlay/benchmark_test.go @@ -265,7 +265,7 @@ func BenchmarkNodeSelection(b *testing.B) { err := overlaydb.TestSuspendNodeUnknownAudit(ctx, nodeID, now) require.NoError(b, err) case 1: - err := overlaydb.DisqualifyNode(ctx, nodeID) + err := overlaydb.DisqualifyNode(ctx, nodeID, time.Now(), overlay.DisqualificationReasonUnknown) require.NoError(b, err) case 2: err := overlaydb.UpdateCheckIn(ctx, overlay.NodeCheckInInfo{ diff --git a/satellite/overlay/db_test.go b/satellite/overlay/db_test.go index 2933f9790..f4fe4a51b 100644 --- a/satellite/overlay/db_test.go +++ b/satellite/overlay/db_test.go @@ -8,13 +8,18 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "storj.io/common/pb" + "storj.io/common/storj" "storj.io/common/testcontext" + "storj.io/common/testrand" "storj.io/storj/private/testplanet" "storj.io/storj/private/teststorj" + "storj.io/storj/satellite" "storj.io/storj/satellite/overlay" + "storj.io/storj/satellite/satellitedb/satellitedbtest" "storj.io/storj/storagenode" ) @@ -164,3 +169,72 @@ func TestOperatorConfig(t *testing.T) { } }) } + +func TestDBDisqualifyNode(t *testing.T) { + satellitedbtest.Run(t, func(ctx *testcontext.Context, t *testing.T, db satellite.DB) { + overlayDB := db.OverlayCache() + now := time.Now().Truncate(time.Second).UTC() + + cases := []struct { + Name string + NodeID storj.NodeID + DisqualifiedAt time.Time + Reason overlay.DisqualificationReason + }{ + { + Name: "Unknown", + NodeID: testrand.NodeID(), + DisqualifiedAt: now, + Reason: overlay.DisqualificationReasonUnknown, + }, + { + Name: "Audit Failure", + NodeID: testrand.NodeID(), + DisqualifiedAt: now, + Reason: overlay.DisqualificationReasonAuditFailure, + }, + { + Name: "Suspension", + NodeID: testrand.NodeID(), + DisqualifiedAt: now, + Reason: overlay.DisqualificationReasonSuspension, + }, + { + Name: "Node Offline", + NodeID: testrand.NodeID(), + DisqualifiedAt: now, + Reason: overlay.DisqualificationReasonNodeOffline, + }, + } + + for _, testcase := range cases { + t.Run(testcase.Name, func(t *testing.T) { + checkIn := overlay.NodeCheckInInfo{ + NodeID: testcase.NodeID, + Address: &pb.NodeAddress{ + Transport: 1, + Address: "127.0.0.1:0", + }, + IsUp: true, + Version: &pb.NodeVersion{ + Version: "v0.0.0", + CommitHash: "", + Timestamp: now, + }, + } + + err := overlayDB.UpdateCheckIn(ctx, checkIn, now, overlay.NodeSelectionConfig{}) + require.NoError(t, err) + + err = overlayDB.DisqualifyNode(ctx, testcase.NodeID, testcase.DisqualifiedAt, testcase.Reason) + require.NoError(t, err) + + info, err := overlayDB.Get(ctx, testcase.NodeID) + require.NoError(t, err) + require.NotNil(t, info.Disqualified) + assert.Equal(t, testcase.DisqualifiedAt, info.Disqualified.UTC()) + assert.Equal(t, testcase.Reason, *info.DisqualificationReason) + }) + } + }) +} diff --git a/satellite/overlay/service.go b/satellite/overlay/service.go index 0ccf74292..fa394f11d 100644 --- a/satellite/overlay/service.go +++ b/satellite/overlay/service.go @@ -93,7 +93,7 @@ type DB interface { GetNodesNetwork(ctx context.Context, nodeIDs []storj.NodeID) (nodeNets []string, err error) // DisqualifyNode disqualifies a storage node. - DisqualifyNode(ctx context.Context, nodeID storj.NodeID) (err error) + DisqualifyNode(ctx context.Context, nodeID storj.NodeID, disqualifiedAt time.Time, reason DisqualificationReason) (err error) // DQNodesLastSeenBefore disqualifies a limited number of nodes where last_contact_success < cutoff except those already disqualified // or gracefully exited or where last_contact_success = '0001-01-01 00:00:00+00'. @@ -216,22 +216,23 @@ type ExitStatusRequest struct { // NodeDossier is the complete info that the satellite tracks for a storage node. type NodeDossier struct { pb.Node - Type pb.NodeType - Operator pb.NodeOperator - Capacity pb.NodeCapacity - Reputation NodeStats - Version pb.NodeVersion - Contained bool - Disqualified *time.Time - UnknownAuditSuspended *time.Time - OfflineSuspended *time.Time - OfflineUnderReview *time.Time - PieceCount int64 - ExitStatus ExitStatus - CreatedAt time.Time - LastNet string - LastIPPort string - CountryCode location.CountryCode + Type pb.NodeType + Operator pb.NodeOperator + Capacity pb.NodeCapacity + Reputation NodeStats + Version pb.NodeVersion + Contained bool + Disqualified *time.Time + DisqualificationReason *DisqualificationReason + UnknownAuditSuspended *time.Time + OfflineSuspended *time.Time + OfflineUnderReview *time.Time + PieceCount int64 + ExitStatus ExitStatus + CreatedAt time.Time + LastNet string + LastIPPort string + CountryCode location.CountryCode } // NodeStats contains statistics about a node. @@ -641,9 +642,9 @@ func (service *Service) GetReliablePiecesInExcludedCountries(ctx context.Context } // DisqualifyNode disqualifies a storage node. -func (service *Service) DisqualifyNode(ctx context.Context, nodeID storj.NodeID) (err error) { +func (service *Service) DisqualifyNode(ctx context.Context, nodeID storj.NodeID, reason DisqualificationReason) (err error) { defer mon.Task()(&ctx)(&err) - return service.db.DisqualifyNode(ctx, nodeID) + return service.db.DisqualifyNode(ctx, nodeID, time.Now().UTC(), reason) } // ResolveIPAndNetwork resolves the target address and determines its IP and /24 subnet IPv4 or /64 subnet IPv6. diff --git a/satellite/overlay/service_test.go b/satellite/overlay/service_test.go index 50d8108df..f331caf2b 100644 --- a/satellite/overlay/service_test.go +++ b/satellite/overlay/service_test.go @@ -87,7 +87,7 @@ func testCache(ctx context.Context, t *testing.T, store overlay.DB) { err = store.UpdateCheckIn(ctx, d, time.Now().UTC(), nodeSelectionConfig) require.NoError(t, err) // disqualify one node - err = service.DisqualifyNode(ctx, valid3ID) + err = service.DisqualifyNode(ctx, valid3ID, overlay.DisqualificationReasonUnknown) require.NoError(t, err) } @@ -440,7 +440,7 @@ func TestKnownReliable(t *testing.T) { oc := satellite.DB.OverlayCache() // Disqualify storage node #0 - err := oc.DisqualifyNode(ctx, planet.StorageNodes[0].ID()) + err := oc.DisqualifyNode(ctx, planet.StorageNodes[0].ID(), time.Now().UTC(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) // Stop storage node #1 diff --git a/satellite/overlay/statdb_test.go b/satellite/overlay/statdb_test.go index 1566ff09a..0707e2f79 100644 --- a/satellite/overlay/statdb_test.go +++ b/satellite/overlay/statdb_test.go @@ -76,7 +76,7 @@ func testDatabase(ctx context.Context, t *testing.T, cache overlay.DB) { } if tt.disqualified { - err = cache.DisqualifyNode(ctx, tt.nodeID) + err = cache.DisqualifyNode(ctx, tt.nodeID, time.Now(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) } if tt.offline { diff --git a/satellite/repair/repair_test.go b/satellite/repair/repair_test.go index 17fa378cf..678f57f44 100644 --- a/satellite/repair/repair_test.go +++ b/satellite/repair/repair_test.go @@ -138,7 +138,7 @@ func testDataRepair(t *testing.T, inMemoryRepair bool) { for _, node := range planet.StorageNodes { if nodesToDisqualify[node.ID()] { - err := satellite.DB.OverlayCache().DisqualifyNode(ctx, node.ID()) + err := satellite.DB.OverlayCache().DisqualifyNode(ctx, node.ID(), time.Now(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) continue } @@ -305,7 +305,7 @@ func TestDataRepairPendingObject(t *testing.T) { for _, node := range planet.StorageNodes { if nodesToDisqualify[node.ID()] { - err := satellite.DB.OverlayCache().DisqualifyNode(ctx, node.ID()) + err := satellite.DB.OverlayCache().DisqualifyNode(ctx, node.ID(), time.Now(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) continue } @@ -1311,7 +1311,7 @@ func TestRepairExpiredSegment(t *testing.T) { } for nodeID := range nodesToDQ { - err := satellite.DB.OverlayCache().DisqualifyNode(ctx, nodeID) + err := satellite.DB.OverlayCache().DisqualifyNode(ctx, nodeID, time.Now(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) } @@ -1396,7 +1396,7 @@ func TestRemoveDeletedSegmentFromQueue(t *testing.T) { } for nodeID := range nodesToDQ { - err := satellite.DB.OverlayCache().DisqualifyNode(ctx, nodeID) + err := satellite.DB.OverlayCache().DisqualifyNode(ctx, nodeID, time.Now(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) } @@ -1659,7 +1659,7 @@ func TestIrreparableSegmentAccordingToOverlay(t *testing.T) { remotePieces := segment.Pieces for i := 0; i < toDQ; i++ { - err := satellite.DB.OverlayCache().DisqualifyNode(ctx, remotePieces[i].StorageNode) + err := satellite.DB.OverlayCache().DisqualifyNode(ctx, remotePieces[i].StorageNode, time.Now(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) } @@ -1671,7 +1671,7 @@ func TestIrreparableSegmentAccordingToOverlay(t *testing.T) { // Disqualify nodes so that online nodes < minimum threshold // This will make the segment irreparable for _, piece := range remotePieces { - err := satellite.DB.OverlayCache().DisqualifyNode(ctx, piece.StorageNode) + err := satellite.DB.OverlayCache().DisqualifyNode(ctx, piece.StorageNode, time.Now(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) } @@ -1852,7 +1852,7 @@ func TestRepairMultipleDisqualifiedAndSuspended(t *testing.T) { // disqualify and suspend nodes for i := 0; i < toDisqualify; i++ { nodesToDisqualify[remotePieces[i].StorageNode] = true - err := satellite.DB.OverlayCache().DisqualifyNode(ctx, remotePieces[i].StorageNode) + err := satellite.DB.OverlayCache().DisqualifyNode(ctx, remotePieces[i].StorageNode, time.Now(), overlay.DisqualificationReasonUnknown) require.NoError(t, err) } for i := toDisqualify; i < toDisqualify+toSuspend; i++ { diff --git a/satellite/reputation/service.go b/satellite/reputation/service.go index 0ae68c71b..8234f9b5f 100644 --- a/satellite/reputation/service.go +++ b/satellite/reputation/service.go @@ -137,13 +137,15 @@ func (service *Service) TestSuspendNodeUnknownAudit(ctx context.Context, nodeID } // TestDisqualifyNode disqualifies a storage node. -func (service *Service) TestDisqualifyNode(ctx context.Context, nodeID storj.NodeID) (err error) { - err = service.db.DisqualifyNode(ctx, nodeID, time.Now()) +func (service *Service) TestDisqualifyNode(ctx context.Context, nodeID storj.NodeID, reason overlay.DisqualificationReason) (err error) { + disqualifiedAt := time.Now() + + err = service.db.DisqualifyNode(ctx, nodeID, disqualifiedAt) if err != nil { return err } - return service.overlay.DisqualifyNode(ctx, nodeID) + return service.overlay.DisqualifyNode(ctx, nodeID, disqualifiedAt, reason) } // TestUnsuspendNodeUnknownAudit unsuspends a storage node for unknown audits. diff --git a/satellite/satellitedb/overlaycache.go b/satellite/satellitedb/overlaycache.go index 648b73aef..82f3d36df 100644 --- a/satellite/satellitedb/overlaycache.go +++ b/satellite/satellitedb/overlaycache.go @@ -642,10 +642,14 @@ func (cache *overlaycache) UpdateReputation(ctx context.Context, id storj.NodeID updateFields := dbx.Node_Update_Fields{} updateFields.UnknownAuditSuspended = dbx.Node_UnknownAuditSuspended_Raw(request.UnknownAuditSuspended) - updateFields.Disqualified = dbx.Node_Disqualified_Raw(request.Disqualified) updateFields.OfflineSuspended = dbx.Node_OfflineSuspended_Raw(request.OfflineSuspended) updateFields.VettedAt = dbx.Node_VettedAt_Raw(request.VettedAt) + updateFields.Disqualified = dbx.Node_Disqualified_Raw(request.Disqualified) + if request.Disqualified != nil { + updateFields.DisqualificationReason = dbx.Node_DisqualificationReason(int(request.DisqualificationReason)) + } + err = cache.db.UpdateNoReturn_Node_By_Id_And_Disqualified_Is_Null_And_ExitFinishedAt_Is_Null(ctx, dbx.Node_Id(id.Bytes()), updateFields) return Error.Wrap(err) } @@ -696,10 +700,11 @@ func (cache *overlaycache) UpdateNodeInfo(ctx context.Context, nodeID storj.Node } // DisqualifyNode disqualifies a storage node. -func (cache *overlaycache) DisqualifyNode(ctx context.Context, nodeID storj.NodeID) (err error) { +func (cache *overlaycache) DisqualifyNode(ctx context.Context, nodeID storj.NodeID, disqualifiedAt time.Time, reason overlay.DisqualificationReason) (err error) { defer mon.Task()(&ctx)(&err) updateFields := dbx.Node_Update_Fields{} - updateFields.Disqualified = dbx.Node_Disqualified(time.Now().UTC()) + updateFields.Disqualified = dbx.Node_Disqualified(disqualifiedAt.UTC()) + updateFields.DisqualificationReason = dbx.Node_DisqualificationReason(int(reason)) dbNode, err := cache.db.Update_Node_By_Id(ctx, dbx.Node_Id(nodeID.Bytes()), updateFields) if err != nil { @@ -1062,14 +1067,15 @@ func convertDBNode(ctx context.Context, info *dbx.Node) (_ *overlay.NodeDossier, Timestamp: info.Timestamp, Release: info.Release, }, - Disqualified: info.Disqualified, - UnknownAuditSuspended: info.UnknownAuditSuspended, - OfflineSuspended: info.OfflineSuspended, - OfflineUnderReview: info.UnderReview, - PieceCount: info.PieceCount, - ExitStatus: exitStatus, - CreatedAt: info.CreatedAt, - LastNet: info.LastNet, + Disqualified: info.Disqualified, + DisqualificationReason: (*overlay.DisqualificationReason)(info.DisqualificationReason), + UnknownAuditSuspended: info.UnknownAuditSuspended, + OfflineSuspended: info.OfflineSuspended, + OfflineUnderReview: info.UnderReview, + PieceCount: info.PieceCount, + ExitStatus: exitStatus, + CreatedAt: info.CreatedAt, + LastNet: info.LastNet, } if info.LastIpPort != nil { node.LastIPPort = *info.LastIpPort