2019-06-07 13:38:41 +01:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package audit_test
|
|
|
|
|
|
|
|
import (
|
|
|
|
"testing"
|
2020-04-23 20:46:16 +01:00
|
|
|
"time"
|
2019-06-07 13:38:41 +01:00
|
|
|
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
"github.com/stretchr/testify/require"
|
2022-08-11 15:17:12 +01:00
|
|
|
"go.uber.org/zap"
|
2019-06-07 13:38:41 +01:00
|
|
|
|
2023-04-26 18:59:56 +01:00
|
|
|
"storj.io/common/memory"
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/storj"
|
|
|
|
"storj.io/common/testcontext"
|
2023-04-26 18:59:56 +01:00
|
|
|
"storj.io/common/testrand"
|
2019-11-14 19:46:15 +00:00
|
|
|
"storj.io/storj/private/testplanet"
|
2022-08-11 15:17:12 +01:00
|
|
|
"storj.io/storj/satellite"
|
2019-07-28 06:55:36 +01:00
|
|
|
"storj.io/storj/satellite/audit"
|
2023-04-26 18:59:56 +01:00
|
|
|
"storj.io/storj/satellite/metabase"
|
2020-04-23 20:46:16 +01:00
|
|
|
"storj.io/storj/satellite/overlay"
|
2019-06-07 13:38:41 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestReportPendingAudits(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 1, UplinkCount: 0,
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
2019-09-11 23:37:01 +01:00
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
audits := satellite.Audit
|
|
|
|
audits.Worker.Loop.Pause()
|
2019-06-07 13:38:41 +01:00
|
|
|
|
|
|
|
nodeID := planet.StorageNodes[0].ID()
|
|
|
|
|
2022-11-22 21:55:19 +00:00
|
|
|
pending := audit.ReverificationJob{
|
|
|
|
Locator: audit.PieceLocator{
|
|
|
|
NodeID: nodeID,
|
|
|
|
},
|
2019-06-07 13:38:41 +01:00
|
|
|
}
|
|
|
|
|
2022-11-23 15:24:30 +00:00
|
|
|
report := audit.Report{PendingAudits: []*audit.ReverificationJob{&pending}}
|
|
|
|
containment := satellite.DB.Containment()
|
2019-06-07 13:38:41 +01:00
|
|
|
|
2022-11-22 21:55:19 +00:00
|
|
|
audits.Reporter.RecordAudits(ctx, report)
|
2019-06-07 13:38:41 +01:00
|
|
|
|
|
|
|
pa, err := containment.Get(ctx, nodeID)
|
|
|
|
require.NoError(t, err)
|
2022-11-22 21:55:19 +00:00
|
|
|
assert.Equal(t, pending.Locator, pa.Locator)
|
2019-06-07 13:38:41 +01:00
|
|
|
})
|
|
|
|
}
|
2019-06-21 16:10:03 +01:00
|
|
|
|
|
|
|
func TestRecordAuditsAtLeastOnce(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 1, UplinkCount: 0,
|
2022-10-12 21:33:31 +01:00
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
|
|
|
|
// disable reputation write cache so changes are immediate
|
|
|
|
config.Reputation.FlushInterval = 0
|
|
|
|
},
|
|
|
|
},
|
2019-06-21 16:10:03 +01:00
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
2019-09-11 23:37:01 +01:00
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
audits := satellite.Audit
|
|
|
|
audits.Worker.Loop.Pause()
|
2019-06-21 16:10:03 +01:00
|
|
|
|
|
|
|
nodeID := planet.StorageNodes[0].ID()
|
|
|
|
|
|
|
|
report := audit.Report{Successes: []storj.NodeID{nodeID}}
|
|
|
|
|
2019-09-11 23:37:01 +01:00
|
|
|
// expect RecordAudits to try recording at least once (maxRetries is set to 0)
|
2022-11-22 21:55:19 +00:00
|
|
|
audits.Reporter.RecordAudits(ctx, report)
|
2019-06-21 16:10:03 +01:00
|
|
|
|
2021-06-23 00:09:39 +01:00
|
|
|
service := satellite.Reputation.Service
|
|
|
|
node, err := service.Get(ctx, nodeID)
|
2019-06-21 16:10:03 +01:00
|
|
|
require.NoError(t, err)
|
2021-06-23 00:09:39 +01:00
|
|
|
require.EqualValues(t, 1, node.TotalAuditCount)
|
2019-06-21 16:10:03 +01:00
|
|
|
})
|
|
|
|
}
|
2020-03-11 21:11:46 +00:00
|
|
|
|
|
|
|
// TestRecordAuditsCorrectOutcome ensures that audit successes, failures, and unknown audits result in the correct disqualification/suspension state.
|
|
|
|
func TestRecordAuditsCorrectOutcome(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 5, UplinkCount: 0,
|
2022-08-11 15:17:12 +01:00
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
|
|
|
|
config.Reputation.InitialAlpha = 1
|
|
|
|
config.Reputation.AuditLambda = 0.95
|
|
|
|
config.Reputation.AuditDQ = 0.6
|
|
|
|
},
|
|
|
|
},
|
2020-03-11 21:11:46 +00:00
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
audits := satellite.Audit
|
|
|
|
audits.Worker.Loop.Pause()
|
|
|
|
|
|
|
|
goodNode := planet.StorageNodes[0].ID()
|
|
|
|
dqNode := planet.StorageNodes[1].ID()
|
|
|
|
suspendedNode := planet.StorageNodes[2].ID()
|
|
|
|
pendingNode := planet.StorageNodes[3].ID()
|
|
|
|
offlineNode := planet.StorageNodes[4].ID()
|
|
|
|
|
|
|
|
report := audit.Report{
|
|
|
|
Successes: []storj.NodeID{goodNode},
|
2023-04-26 18:59:56 +01:00
|
|
|
Fails: metabase.Pieces{{StorageNode: dqNode}},
|
2020-03-11 21:11:46 +00:00
|
|
|
Unknown: []storj.NodeID{suspendedNode},
|
2022-11-23 15:24:30 +00:00
|
|
|
PendingAudits: []*audit.ReverificationJob{
|
2020-03-11 21:11:46 +00:00
|
|
|
{
|
2022-11-22 21:55:19 +00:00
|
|
|
Locator: audit.PieceLocator{NodeID: pendingNode},
|
|
|
|
ReverifyCount: 0,
|
2020-03-11 21:11:46 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
Offlines: []storj.NodeID{offlineNode},
|
|
|
|
}
|
|
|
|
|
2022-11-22 21:55:19 +00:00
|
|
|
audits.Reporter.RecordAudits(ctx, report)
|
2020-03-11 21:11:46 +00:00
|
|
|
|
|
|
|
overlay := satellite.Overlay.Service
|
|
|
|
node, err := overlay.Get(ctx, goodNode)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Nil(t, node.Disqualified)
|
2020-06-10 17:11:25 +01:00
|
|
|
require.Nil(t, node.UnknownAuditSuspended)
|
2020-03-11 21:11:46 +00:00
|
|
|
|
|
|
|
node, err = overlay.Get(ctx, dqNode)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, node.Disqualified)
|
2020-06-10 17:11:25 +01:00
|
|
|
require.Nil(t, node.UnknownAuditSuspended)
|
2020-03-11 21:11:46 +00:00
|
|
|
|
|
|
|
node, err = overlay.Get(ctx, suspendedNode)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Nil(t, node.Disqualified)
|
2020-06-10 17:11:25 +01:00
|
|
|
require.NotNil(t, node.UnknownAuditSuspended)
|
2020-03-11 21:11:46 +00:00
|
|
|
|
|
|
|
node, err = overlay.Get(ctx, pendingNode)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Nil(t, node.Disqualified)
|
2020-06-10 17:11:25 +01:00
|
|
|
require.Nil(t, node.UnknownAuditSuspended)
|
2020-03-11 21:11:46 +00:00
|
|
|
|
|
|
|
node, err = overlay.Get(ctx, offlineNode)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Nil(t, node.Disqualified)
|
2020-06-10 17:11:25 +01:00
|
|
|
require.Nil(t, node.UnknownAuditSuspended)
|
2020-03-11 21:11:46 +00:00
|
|
|
})
|
|
|
|
}
|
2020-04-08 23:28:25 +01:00
|
|
|
|
|
|
|
func TestSuspensionTimeNotResetBySuccessiveAudit(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 1, UplinkCount: 0,
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
audits := satellite.Audit
|
|
|
|
audits.Worker.Loop.Pause()
|
|
|
|
|
|
|
|
suspendedNode := planet.StorageNodes[0].ID()
|
|
|
|
|
2022-11-22 21:55:19 +00:00
|
|
|
audits.Reporter.RecordAudits(ctx, audit.Report{Unknown: []storj.NodeID{suspendedNode}})
|
2020-04-08 23:28:25 +01:00
|
|
|
|
|
|
|
overlay := satellite.Overlay.Service
|
|
|
|
|
|
|
|
node, err := overlay.Get(ctx, suspendedNode)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Nil(t, node.Disqualified)
|
2020-06-10 17:11:25 +01:00
|
|
|
require.NotNil(t, node.UnknownAuditSuspended)
|
2020-04-08 23:28:25 +01:00
|
|
|
|
2020-06-10 17:11:25 +01:00
|
|
|
suspendedAt := node.UnknownAuditSuspended
|
2020-04-08 23:28:25 +01:00
|
|
|
|
2022-11-22 21:55:19 +00:00
|
|
|
audits.Reporter.RecordAudits(ctx, audit.Report{Unknown: []storj.NodeID{suspendedNode}})
|
2020-04-08 23:28:25 +01:00
|
|
|
|
|
|
|
node, err = overlay.Get(ctx, suspendedNode)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Nil(t, node.Disqualified)
|
2020-06-10 17:11:25 +01:00
|
|
|
require.NotNil(t, node.UnknownAuditSuspended)
|
|
|
|
require.Equal(t, suspendedAt, node.UnknownAuditSuspended)
|
2020-04-08 23:28:25 +01:00
|
|
|
})
|
|
|
|
}
|
2020-04-23 20:46:16 +01:00
|
|
|
|
|
|
|
// TestGracefullyExitedNotUpdated verifies that a gracefully exited node's reputation, suspension,
|
|
|
|
// and disqualification flags are not updated when an audit is reported for that node.
|
|
|
|
func TestGracefullyExitedNotUpdated(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 5, UplinkCount: 0,
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
audits := satellite.Audit
|
|
|
|
audits.Worker.Loop.Pause()
|
|
|
|
cache := satellite.Overlay.DB
|
2021-06-23 00:09:39 +01:00
|
|
|
reputationDB := satellite.DB.Reputation()
|
2020-04-23 20:46:16 +01:00
|
|
|
|
|
|
|
successNode := planet.StorageNodes[0]
|
|
|
|
failedNode := planet.StorageNodes[1]
|
|
|
|
containedNode := planet.StorageNodes[2]
|
|
|
|
unknownNode := planet.StorageNodes[3]
|
|
|
|
offlineNode := planet.StorageNodes[4]
|
|
|
|
nodeList := []*testplanet.StorageNode{successNode, failedNode, containedNode, unknownNode, offlineNode}
|
|
|
|
|
2021-06-23 00:09:39 +01:00
|
|
|
report := audit.Report{
|
|
|
|
Successes: storj.NodeIDList{successNode.ID(), failedNode.ID(), containedNode.ID(), unknownNode.ID(), offlineNode.ID()},
|
|
|
|
}
|
2022-11-22 21:55:19 +00:00
|
|
|
audits.Reporter.RecordAudits(ctx, report)
|
2021-06-23 00:09:39 +01:00
|
|
|
|
2020-04-23 20:46:16 +01:00
|
|
|
// mark each node as having gracefully exited
|
|
|
|
for _, node := range nodeList {
|
|
|
|
req := &overlay.ExitStatusRequest{
|
|
|
|
NodeID: node.ID(),
|
|
|
|
ExitInitiatedAt: time.Now(),
|
|
|
|
ExitLoopCompletedAt: time.Now(),
|
|
|
|
ExitFinishedAt: time.Now(),
|
|
|
|
}
|
|
|
|
_, err := cache.UpdateExitStatus(ctx, req)
|
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
2022-11-22 21:55:19 +00:00
|
|
|
pending := audit.ReverificationJob{
|
|
|
|
Locator: audit.PieceLocator{
|
|
|
|
NodeID: containedNode.ID(),
|
|
|
|
},
|
2020-04-23 20:46:16 +01:00
|
|
|
}
|
2021-06-23 00:09:39 +01:00
|
|
|
report = audit.Report{
|
2022-11-23 15:24:30 +00:00
|
|
|
Successes: storj.NodeIDList{successNode.ID()},
|
2023-04-26 18:59:56 +01:00
|
|
|
Fails: metabase.Pieces{{StorageNode: failedNode.ID()}},
|
2022-11-23 15:24:30 +00:00
|
|
|
Offlines: storj.NodeIDList{offlineNode.ID()},
|
|
|
|
PendingAudits: []*audit.ReverificationJob{&pending},
|
|
|
|
Unknown: storj.NodeIDList{unknownNode.ID()},
|
2020-04-23 20:46:16 +01:00
|
|
|
}
|
2022-11-22 21:55:19 +00:00
|
|
|
audits.Reporter.RecordAudits(ctx, report)
|
2020-04-23 20:46:16 +01:00
|
|
|
|
|
|
|
// since every node has gracefully exit, reputation, dq, and suspension should remain at default values
|
|
|
|
for _, node := range nodeList {
|
2021-06-23 00:09:39 +01:00
|
|
|
nodeCacheInfo, err := reputationDB.Get(ctx, node.ID())
|
2020-04-23 20:46:16 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
2020-06-10 17:11:25 +01:00
|
|
|
require.Nil(t, nodeCacheInfo.UnknownAuditSuspended)
|
2020-04-23 20:46:16 +01:00
|
|
|
require.Nil(t, nodeCacheInfo.Disqualified)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
2020-10-22 22:02:48 +01:00
|
|
|
|
|
|
|
func TestReportOfflineAudits(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 1, UplinkCount: 0,
|
2022-10-12 21:33:31 +01:00
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
|
|
|
|
// disable reputation write cache so changes are immediate
|
|
|
|
config.Reputation.FlushInterval = 0
|
|
|
|
},
|
|
|
|
},
|
2020-10-22 22:02:48 +01:00
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
node := planet.StorageNodes[0]
|
|
|
|
audits := satellite.Audit
|
|
|
|
audits.Worker.Loop.Pause()
|
2022-05-07 20:04:12 +01:00
|
|
|
reputationService := satellite.Core.Reputation.Service
|
2020-10-22 22:02:48 +01:00
|
|
|
|
2022-11-22 21:55:19 +00:00
|
|
|
audits.Reporter.RecordAudits(ctx, audit.Report{Offlines: storj.NodeIDList{node.ID()}})
|
2020-10-22 22:02:48 +01:00
|
|
|
|
2022-05-07 20:04:12 +01:00
|
|
|
info, err := reputationService.Get(ctx, node.ID())
|
2020-10-22 22:02:48 +01:00
|
|
|
require.NoError(t, err)
|
2021-06-23 00:09:39 +01:00
|
|
|
require.Equal(t, int64(1), info.TotalAuditCount)
|
2020-10-22 22:02:48 +01:00
|
|
|
|
|
|
|
// check that other reputation stats were not incorrectly updated by offline audit
|
2021-06-23 00:09:39 +01:00
|
|
|
require.EqualValues(t, 0, info.AuditSuccessCount)
|
2022-08-11 15:17:12 +01:00
|
|
|
require.EqualValues(t, satellite.Config.Reputation.InitialAlpha, info.AuditReputationAlpha)
|
|
|
|
require.EqualValues(t, satellite.Config.Reputation.InitialBeta, info.AuditReputationBeta)
|
2021-06-23 00:09:39 +01:00
|
|
|
require.EqualValues(t, 1, info.UnknownAuditReputationAlpha)
|
|
|
|
require.EqualValues(t, 0, info.UnknownAuditReputationBeta)
|
2020-10-22 22:02:48 +01:00
|
|
|
})
|
|
|
|
}
|
2023-04-26 18:59:56 +01:00
|
|
|
|
|
|
|
func TestReportingAuditFailureResultsInRemovalOfPiece(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 6, UplinkCount: 1,
|
|
|
|
Reconfigure: testplanet.Reconfigure{
|
|
|
|
Satellite: testplanet.Combine(
|
|
|
|
func(log *zap.Logger, index int, config *satellite.Config) {
|
|
|
|
// disable reputation write cache so changes are immediate
|
|
|
|
config.Reputation.FlushInterval = 0
|
|
|
|
},
|
|
|
|
testplanet.ReconfigureRS(4, 5, 6, 6),
|
|
|
|
),
|
|
|
|
},
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
ul := planet.Uplinks[0]
|
|
|
|
|
|
|
|
testData := testrand.Bytes(1 * memory.MiB)
|
|
|
|
err := ul.Upload(ctx, satellite, "bucket-for-test", "path/of/testness", testData)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
segment, _ := getRemoteSegment(ctx, t, satellite, ul.Projects[0].ID, "bucket-for-test")
|
|
|
|
|
|
|
|
report := audit.Report{
|
|
|
|
Segment: &segment,
|
|
|
|
Fails: metabase.Pieces{
|
|
|
|
metabase.Piece{
|
|
|
|
Number: segment.Pieces[0].Number,
|
|
|
|
StorageNode: segment.Pieces[0].StorageNode,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
satellite.Audit.Reporter.RecordAudits(ctx, report)
|
|
|
|
|
|
|
|
// piece marked as failed is no longer in the segment
|
|
|
|
afterSegment, _ := getRemoteSegment(ctx, t, satellite, ul.Projects[0].ID, "bucket-for-test")
|
|
|
|
require.Len(t, afterSegment.Pieces, len(segment.Pieces)-1)
|
|
|
|
for i, p := range afterSegment.Pieces {
|
|
|
|
assert.NotEqual(t, segment.Pieces[0].Number, p.Number, i)
|
|
|
|
assert.NotEqual(t, segment.Pieces[0].StorageNode, p.StorageNode, i)
|
|
|
|
}
|
|
|
|
|
|
|
|
// segment is still retrievable
|
|
|
|
gotData, err := ul.Download(ctx, satellite, "bucket-for-test", "path/of/testness")
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, testData, gotData)
|
|
|
|
})
|
|
|
|
}
|