60a95d0dc9
When a node's audit history "online score" passes below a configured threshold, the node goes into "offline suspension" mode and begins a review period, where the operator is given an opportunity to bring their node back online. After the review period passes, offline suspension is turned off for the node. In the future, if a node still has a bad online score at the end of the review period, it will be disqualified. This is disabled right now. In the future, if a node is in offline suspension, it will be treated as "unhealthy". Right now, there are no consequences for being in offline suspension. Minor changes: * Moves AuditHistoryConfig out of UpdateStats/BatchUpdateStats args and into UpdateRequest. * Adds "now" argument to UpdateStats/BatchUpdateStats args for easy testing. * Changes formatting strings inside buildUpdateStatement to use specific types. Change-Id: I032b60298840fc16e6ef831da750f2d57619a397
183 lines
7.3 KiB
Go
183 lines
7.3 KiB
Go
// Copyright (C) 2020 Storj Labs, Inc.
|
|
// See LICENSE for copying information.
|
|
|
|
package satellitedb_test
|
|
|
|
import (
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"storj.io/common/testcontext"
|
|
"storj.io/storj/private/testplanet"
|
|
"storj.io/storj/satellite/overlay"
|
|
)
|
|
|
|
func TestUpdateStats(t *testing.T) {
|
|
testplanet.Run(t, testplanet.Config{
|
|
SatelliteCount: 1, StorageNodeCount: 2,
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
nodeA := planet.StorageNodes[0]
|
|
nodeB := planet.StorageNodes[1]
|
|
nodeA.Contact.Chore.Pause(ctx)
|
|
nodeB.Contact.Chore.Pause(ctx)
|
|
cache := planet.Satellites[0].DB.OverlayCache()
|
|
numAudits := int64(2)
|
|
numUptimes := int64(3)
|
|
|
|
// nodeA: 1 audit, 2 uptime -> unvetted
|
|
updateReq := &overlay.UpdateRequest{
|
|
NodeID: nodeA.ID(),
|
|
AuditOutcome: overlay.AuditFailure,
|
|
IsUp: false,
|
|
AuditsRequiredForVetting: numAudits,
|
|
UptimesRequiredForVetting: numUptimes,
|
|
AuditHistory: testAuditHistoryConfig(),
|
|
}
|
|
nodeStats, err := cache.UpdateStats(ctx, updateReq, time.Now())
|
|
require.NoError(t, err)
|
|
assert.Nil(t, nodeStats.VettedAt)
|
|
assert.EqualValues(t, 1, nodeStats.AuditCount)
|
|
assert.EqualValues(t, 2, nodeStats.UptimeCount)
|
|
|
|
// nodeA: 2 audits, 2 uptimes -> unvetted
|
|
updateReq.NodeID = nodeA.ID()
|
|
updateReq.AuditOutcome = overlay.AuditFailure
|
|
updateReq.IsUp = false
|
|
nodeStats, err = cache.UpdateStats(ctx, updateReq, time.Now())
|
|
require.NoError(t, err)
|
|
assert.Nil(t, nodeStats.VettedAt)
|
|
assert.EqualValues(t, 2, nodeStats.AuditCount)
|
|
assert.EqualValues(t, 2, nodeStats.UptimeCount)
|
|
|
|
// nodeA: 3 audits, 3 uptimes -> vetted
|
|
updateReq.NodeID = nodeA.ID()
|
|
updateReq.AuditOutcome = overlay.AuditSuccess
|
|
updateReq.IsUp = true
|
|
nodeStats, err = cache.UpdateStats(ctx, updateReq, time.Now())
|
|
require.NoError(t, err)
|
|
assert.NotNil(t, nodeStats.VettedAt)
|
|
assert.EqualValues(t, 3, nodeStats.AuditCount)
|
|
assert.EqualValues(t, 3, nodeStats.UptimeCount)
|
|
|
|
// nodeB: 1 audit, 3 uptimes -> unvetted
|
|
updateReq.NodeID = nodeB.ID()
|
|
updateReq.AuditOutcome = overlay.AuditSuccess
|
|
updateReq.IsUp = true
|
|
nodeStats, err = cache.UpdateStats(ctx, updateReq, time.Now())
|
|
require.NoError(t, err)
|
|
assert.Nil(t, nodeStats.VettedAt)
|
|
assert.EqualValues(t, 1, nodeStats.AuditCount)
|
|
assert.EqualValues(t, 3, nodeStats.UptimeCount)
|
|
|
|
// nodeB: 2 audits, 3 uptimes -> vetted
|
|
updateReq.NodeID = nodeB.ID()
|
|
updateReq.AuditOutcome = overlay.AuditFailure
|
|
updateReq.IsUp = false
|
|
nodeStats, err = cache.UpdateStats(ctx, updateReq, time.Now())
|
|
require.NoError(t, err)
|
|
assert.NotNil(t, nodeStats.VettedAt)
|
|
assert.EqualValues(t, 2, nodeStats.AuditCount)
|
|
assert.EqualValues(t, 3, nodeStats.UptimeCount)
|
|
|
|
// Don't overwrite node b's vetted_at timestamp
|
|
updateReq.NodeID = nodeB.ID()
|
|
updateReq.AuditOutcome = overlay.AuditSuccess
|
|
updateReq.IsUp = true
|
|
nodeStats2, err := cache.UpdateStats(ctx, updateReq, time.Now())
|
|
require.NoError(t, err)
|
|
assert.NotNil(t, nodeStats2.VettedAt)
|
|
assert.Equal(t, nodeStats.VettedAt, nodeStats2.VettedAt)
|
|
assert.EqualValues(t, 3, nodeStats2.AuditCount)
|
|
assert.EqualValues(t, 4, nodeStats2.UptimeCount)
|
|
})
|
|
}
|
|
|
|
func TestBatchUpdateStats(t *testing.T) {
|
|
testplanet.Run(t, testplanet.Config{
|
|
SatelliteCount: 1, StorageNodeCount: 2,
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
nodeA := planet.StorageNodes[0]
|
|
nodeB := planet.StorageNodes[1]
|
|
nodeA.Contact.Chore.Pause(ctx)
|
|
nodeB.Contact.Chore.Pause(ctx)
|
|
cache := planet.Satellites[0].DB.OverlayCache()
|
|
numAudits := int64(2)
|
|
numUptimes := int64(3)
|
|
batchSize := 2
|
|
|
|
// both nodeA and nodeB unvetted
|
|
updateReqA := &overlay.UpdateRequest{NodeID: nodeA.ID(), AuditOutcome: overlay.AuditFailure, IsUp: false, AuditsRequiredForVetting: numAudits, UptimesRequiredForVetting: numUptimes, AuditHistory: testAuditHistoryConfig()}
|
|
updateReqB := &overlay.UpdateRequest{NodeID: nodeB.ID(), AuditOutcome: overlay.AuditSuccess, IsUp: true, AuditsRequiredForVetting: numAudits, UptimesRequiredForVetting: numUptimes, AuditHistory: testAuditHistoryConfig()}
|
|
updateReqs := []*overlay.UpdateRequest{updateReqA, updateReqB}
|
|
failed, err := cache.BatchUpdateStats(ctx, updateReqs, batchSize, time.Now())
|
|
require.NoError(t, err)
|
|
assert.Len(t, failed, 0)
|
|
|
|
nA, err := cache.Get(ctx, nodeA.ID())
|
|
require.NoError(t, err)
|
|
assert.Nil(t, nA.Reputation.VettedAt)
|
|
assert.EqualValues(t, 1, nA.Reputation.AuditCount)
|
|
assert.EqualValues(t, 2, nA.Reputation.UptimeCount)
|
|
|
|
nB, err := cache.Get(ctx, nodeB.ID())
|
|
require.NoError(t, err)
|
|
assert.Nil(t, nB.Reputation.VettedAt)
|
|
assert.EqualValues(t, 1, nB.Reputation.AuditCount)
|
|
assert.EqualValues(t, 3, nB.Reputation.UptimeCount)
|
|
|
|
// nodeA unvetted, nodeB vetted
|
|
updateReqA = &overlay.UpdateRequest{NodeID: nodeA.ID(), AuditOutcome: overlay.AuditFailure, IsUp: false, AuditsRequiredForVetting: numAudits, UptimesRequiredForVetting: numUptimes, AuditHistory: testAuditHistoryConfig()}
|
|
updateReqB = &overlay.UpdateRequest{NodeID: nodeB.ID(), AuditOutcome: overlay.AuditFailure, IsUp: false, AuditsRequiredForVetting: numAudits, UptimesRequiredForVetting: numUptimes, AuditHistory: testAuditHistoryConfig()}
|
|
updateReqs = []*overlay.UpdateRequest{updateReqA, updateReqB}
|
|
failed, err = cache.BatchUpdateStats(ctx, updateReqs, batchSize, time.Now())
|
|
require.NoError(t, err)
|
|
assert.Len(t, failed, 0)
|
|
|
|
nA, err = cache.Get(ctx, nodeA.ID())
|
|
require.NoError(t, err)
|
|
assert.Nil(t, nA.Reputation.VettedAt)
|
|
assert.EqualValues(t, 2, nA.Reputation.AuditCount)
|
|
assert.EqualValues(t, 2, nA.Reputation.UptimeCount)
|
|
|
|
nB, err = cache.Get(ctx, nodeB.ID())
|
|
require.NoError(t, err)
|
|
assert.NotNil(t, nB.Reputation.VettedAt)
|
|
assert.EqualValues(t, 2, nB.Reputation.AuditCount)
|
|
assert.EqualValues(t, 3, nB.Reputation.UptimeCount)
|
|
|
|
// both nodeA and nodeB vetted (don't overwrite timestamp)
|
|
updateReqA = &overlay.UpdateRequest{NodeID: nodeA.ID(), AuditOutcome: overlay.AuditSuccess, IsUp: true, AuditsRequiredForVetting: numAudits, UptimesRequiredForVetting: numUptimes, AuditHistory: testAuditHistoryConfig()}
|
|
updateReqB = &overlay.UpdateRequest{NodeID: nodeB.ID(), AuditOutcome: overlay.AuditSuccess, IsUp: true, AuditsRequiredForVetting: numAudits, UptimesRequiredForVetting: numUptimes, AuditHistory: testAuditHistoryConfig()}
|
|
updateReqs = []*overlay.UpdateRequest{updateReqA, updateReqB}
|
|
failed, err = cache.BatchUpdateStats(ctx, updateReqs, batchSize, time.Now())
|
|
require.NoError(t, err)
|
|
assert.Len(t, failed, 0)
|
|
|
|
nA, err = cache.Get(ctx, nodeA.ID())
|
|
require.NoError(t, err)
|
|
assert.NotNil(t, nA.Reputation.VettedAt)
|
|
assert.EqualValues(t, 3, nA.Reputation.AuditCount)
|
|
assert.EqualValues(t, 3, nA.Reputation.UptimeCount)
|
|
|
|
nB2, err := cache.Get(ctx, nodeB.ID())
|
|
require.NoError(t, err)
|
|
assert.NotNil(t, nB2.Reputation.VettedAt)
|
|
assert.Equal(t, nB.Reputation.VettedAt, nB2.Reputation.VettedAt)
|
|
assert.EqualValues(t, 3, nB2.Reputation.AuditCount)
|
|
assert.EqualValues(t, 4, nB2.Reputation.UptimeCount)
|
|
})
|
|
}
|
|
|
|
// returns an AuditHistoryConfig with sensible test values.
|
|
func testAuditHistoryConfig() overlay.AuditHistoryConfig {
|
|
return overlay.AuditHistoryConfig{
|
|
WindowSize: time.Hour,
|
|
TrackingPeriod: time.Hour,
|
|
GracePeriod: time.Hour,
|
|
OfflineThreshold: 0,
|
|
}
|
|
}
|