satellite/overlay: insert DQ node events for stray nodes
Change-Id: I99da11e506ab7f6bcebdb08a5815078a3297c932
This commit is contained in:
parent
74ddfab810
commit
cb0c359b81
@ -263,7 +263,7 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB,
|
||||
})
|
||||
|
||||
if config.StrayNodes.EnableDQ {
|
||||
peer.Overlay.DQStrayNodes = straynodes.NewChore(peer.Log.Named("overlay:dq-stray-nodes"), peer.Overlay.DB, config.StrayNodes)
|
||||
peer.Overlay.DQStrayNodes = straynodes.NewChore(peer.Log.Named("overlay:dq-stray-nodes"), peer.Overlay.Service, config.StrayNodes)
|
||||
peer.Services.Add(lifecycle.Item{
|
||||
Name: "overlay:dq-stray-nodes",
|
||||
Run: peer.Overlay.DQStrayNodes.Run,
|
||||
|
@ -63,7 +63,7 @@ func TestDQNodesLastSeenBefore(t *testing.T) {
|
||||
// check that limit works, set limit = 1
|
||||
// run twice to DQ nodes 1 and 2
|
||||
for i := 0; i < 2; i++ {
|
||||
n, err := cache.DQNodesLastSeenBefore(ctx, time.Now(), 1)
|
||||
_, n, err := cache.DQNodesLastSeenBefore(ctx, time.Now(), 1)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, n, 1)
|
||||
}
|
||||
@ -81,7 +81,7 @@ func TestDQNodesLastSeenBefore(t *testing.T) {
|
||||
// there should be no more nodes for DQ
|
||||
// use higher limit to double check that DQ times
|
||||
// for nodes 1 and 2 have not changed
|
||||
n, err := cache.DQNodesLastSeenBefore(ctx, time.Now(), 100)
|
||||
_, n, err := cache.DQNodesLastSeenBefore(ctx, time.Now(), 100)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, n, 0)
|
||||
|
||||
|
@ -100,7 +100,7 @@ type DB interface {
|
||||
|
||||
// DQNodesLastSeenBefore disqualifies a limited number of nodes where last_contact_success < cutoff except those already disqualified
|
||||
// or gracefully exited or where last_contact_success = '0001-01-01 00:00:00+00'.
|
||||
DQNodesLastSeenBefore(ctx context.Context, cutoff time.Time, limit int) (count int, err error)
|
||||
DQNodesLastSeenBefore(ctx context.Context, cutoff time.Time, limit int) (nodeEmails map[storj.NodeID]string, count int, err error)
|
||||
|
||||
// TestSuspendNodeUnknownAudit suspends a storage node for unknown audits.
|
||||
TestSuspendNodeUnknownAudit(ctx context.Context, nodeID storj.NodeID, suspendedAt time.Time) (err error)
|
||||
@ -700,6 +700,25 @@ func (service *Service) GetReliablePiecesInExcludedCountries(ctx context.Context
|
||||
return piecesInExcluded, nil
|
||||
}
|
||||
|
||||
// DQNodesLastSeenBefore disqualifies nodes who have not been contacted since the cutoff time.
|
||||
func (service *Service) DQNodesLastSeenBefore(ctx context.Context, cutoff time.Time, limit int) (count int, err error) {
|
||||
defer mon.Task()(&ctx)(&err)
|
||||
|
||||
nodes, count, err := service.db.DQNodesLastSeenBefore(ctx, cutoff, limit)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if service.config.SendNodeEmails {
|
||||
for nodeID, email := range nodes {
|
||||
_, err = service.nodeEvents.Insert(ctx, email, nodeID, nodeevents.Disqualified)
|
||||
if err != nil {
|
||||
service.log.Error("could not insert node disqualified into node events", zap.Error(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
return count, err
|
||||
}
|
||||
|
||||
// DisqualifyNode disqualifies a storage node.
|
||||
func (service *Service) DisqualifyNode(ctx context.Context, nodeID storj.NodeID, reason DisqualificationReason) (err error) {
|
||||
defer mon.Task()(&ctx)(&err)
|
||||
|
@ -27,14 +27,14 @@ type Config struct {
|
||||
// Chore disqualifies stray nodes.
|
||||
type Chore struct {
|
||||
log *zap.Logger
|
||||
cache overlay.DB
|
||||
cache *overlay.Service
|
||||
maxDurationWithoutContact time.Duration
|
||||
limit int
|
||||
Loop *sync2.Cycle
|
||||
}
|
||||
|
||||
// NewChore creates a new stray nodes Chore.
|
||||
func NewChore(log *zap.Logger, cache overlay.DB, config Config) *Chore {
|
||||
func NewChore(log *zap.Logger, cache *overlay.Service, config Config) *Chore {
|
||||
return &Chore{
|
||||
log: log,
|
||||
cache: cache,
|
||||
|
@ -14,6 +14,7 @@ import (
|
||||
"storj.io/common/testcontext"
|
||||
"storj.io/storj/private/testplanet"
|
||||
"storj.io/storj/satellite"
|
||||
"storj.io/storj/satellite/nodeevents"
|
||||
"storj.io/storj/satellite/overlay"
|
||||
)
|
||||
|
||||
@ -23,6 +24,7 @@ func TestDQStrayNodes(t *testing.T) {
|
||||
Reconfigure: testplanet.Reconfigure{
|
||||
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
|
||||
config.StrayNodes.MaxDurationWithoutContact = 24 * time.Hour
|
||||
config.Overlay.SendNodeEmails = true
|
||||
},
|
||||
},
|
||||
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
||||
@ -33,7 +35,7 @@ func TestDQStrayNodes(t *testing.T) {
|
||||
sat.Overlay.DQStrayNodes.Loop.Pause()
|
||||
|
||||
cache := planet.Satellites[0].Overlay.DB
|
||||
|
||||
email := "test@storj.test"
|
||||
strayInfo, err := cache.Get(ctx, strayNode.ID())
|
||||
require.NoError(t, err)
|
||||
require.Nil(t, strayInfo.Disqualified)
|
||||
@ -50,6 +52,9 @@ func TestDQStrayNodes(t *testing.T) {
|
||||
Timestamp: time.Time{},
|
||||
Release: false,
|
||||
},
|
||||
Operator: &pb.NodeOperator{
|
||||
Email: email,
|
||||
},
|
||||
}
|
||||
|
||||
// set strayNode last_contact_success to 48 hours ago
|
||||
@ -57,6 +62,11 @@ func TestDQStrayNodes(t *testing.T) {
|
||||
|
||||
sat.Overlay.DQStrayNodes.Loop.TriggerWait()
|
||||
|
||||
ne, err := sat.DB.NodeEvents().GetLatestByEmailAndEvent(ctx, email, nodeevents.Disqualified)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, email, ne.Email)
|
||||
require.Equal(t, strayNode.ID(), ne.NodeID)
|
||||
|
||||
strayInfo, err = cache.Get(ctx, strayNode.ID())
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, strayInfo.Disqualified)
|
||||
|
@ -1131,20 +1131,21 @@ func getNodeStats(dbNode *dbx.Node) *overlay.NodeStats {
|
||||
|
||||
// DQNodesLastSeenBefore disqualifies a limited number of nodes where last_contact_success < cutoff except those already disqualified
|
||||
// or gracefully exited or where last_contact_success = '0001-01-01 00:00:00+00'.
|
||||
func (cache *overlaycache) DQNodesLastSeenBefore(ctx context.Context, cutoff time.Time, limit int) (count int, err error) {
|
||||
func (cache *overlaycache) DQNodesLastSeenBefore(ctx context.Context, cutoff time.Time, limit int) (nodeEmails map[storj.NodeID]string, count int, err error) {
|
||||
defer mon.Task()(&ctx)(&err)
|
||||
|
||||
var nodeIDs []storj.NodeID
|
||||
nodeEmails = make(map[storj.NodeID]string)
|
||||
for {
|
||||
nodeIDs, err = cache.getNodesForDQLastSeenBefore(ctx, cutoff, limit)
|
||||
if err != nil {
|
||||
if cockroachutil.NeedsRetry(err) {
|
||||
continue
|
||||
}
|
||||
return 0, err
|
||||
return nil, 0, err
|
||||
}
|
||||
if len(nodeIDs) == 0 {
|
||||
return 0, nil
|
||||
return nil, 0, nil
|
||||
}
|
||||
break
|
||||
}
|
||||
@ -1159,28 +1160,29 @@ func (cache *overlaycache) DQNodesLastSeenBefore(ctx context.Context, cutoff tim
|
||||
AND exit_finished_at IS NULL
|
||||
AND last_contact_success < $2
|
||||
AND last_contact_success != '0001-01-01 00:00:00+00'::timestamptz
|
||||
RETURNING id, last_contact_success;
|
||||
RETURNING id, email, last_contact_success;
|
||||
`), pgutil.NodeIDArray(nodeIDs), cutoff, overlay.DisqualificationReasonNodeOffline)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
return nil, 0, err
|
||||
}
|
||||
defer func() { err = errs.Combine(err, rows.Close()) }()
|
||||
|
||||
for rows.Next() {
|
||||
var id storj.NodeID
|
||||
var email string
|
||||
var lcs time.Time
|
||||
err = rows.Scan(&id, &lcs)
|
||||
err = rows.Scan(&id, &email, &lcs)
|
||||
if err != nil {
|
||||
return count, err
|
||||
return nil, count, err
|
||||
}
|
||||
cache.db.log.Info("Disqualified",
|
||||
zap.String("DQ type", "stray node"),
|
||||
zap.Stringer("Node ID", id),
|
||||
zap.Stringer("Last contacted", lcs))
|
||||
|
||||
nodeEmails[id] = email
|
||||
count++
|
||||
}
|
||||
return count, rows.Err()
|
||||
return nodeEmails, count, rows.Err()
|
||||
}
|
||||
|
||||
func (cache *overlaycache) getNodesForDQLastSeenBefore(ctx context.Context, cutoff time.Time, limit int) (nodes []storj.NodeID, err error) {
|
||||
|
Loading…
Reference in New Issue
Block a user