2020-04-14 21:50:02 +01:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package overlay_test
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-06-30 11:13:18 +01:00
|
|
|
"fmt"
|
|
|
|
"math/rand"
|
2020-04-14 21:50:02 +01:00
|
|
|
"strconv"
|
2023-06-30 11:13:18 +01:00
|
|
|
"strings"
|
2020-04-14 21:50:02 +01:00
|
|
|
"sync"
|
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
|
2023-06-30 11:13:18 +01:00
|
|
|
"storj.io/common/identity/testidentity"
|
2020-04-14 21:50:02 +01:00
|
|
|
"storj.io/common/memory"
|
|
|
|
"storj.io/common/pb"
|
|
|
|
"storj.io/common/storj"
|
2023-06-30 11:13:18 +01:00
|
|
|
"storj.io/common/storj/location"
|
2020-04-14 21:50:02 +01:00
|
|
|
"storj.io/common/sync2"
|
|
|
|
"storj.io/common/testcontext"
|
2020-05-06 18:22:54 +01:00
|
|
|
"storj.io/common/testrand"
|
2022-02-25 16:53:24 +00:00
|
|
|
"storj.io/storj/private/testplanet"
|
2020-04-14 21:50:02 +01:00
|
|
|
"storj.io/storj/satellite"
|
2023-06-30 11:35:07 +01:00
|
|
|
"storj.io/storj/satellite/nodeselection/uploadselection"
|
2020-04-14 21:50:02 +01:00
|
|
|
"storj.io/storj/satellite/overlay"
|
|
|
|
"storj.io/storj/satellite/satellitedb/satellitedbtest"
|
|
|
|
)
|
|
|
|
|
2020-05-06 18:22:54 +01:00
|
|
|
var nodeSelectionConfig = overlay.NodeSelectionConfig{
|
2020-04-14 21:50:02 +01:00
|
|
|
NewNodeFraction: 0.2,
|
|
|
|
MinimumVersion: "v1.0.0",
|
|
|
|
OnlineWindow: 4 * time.Hour,
|
|
|
|
DistinctIP: true,
|
|
|
|
MinimumDiskSpace: 100 * memory.MiB,
|
2021-06-15 11:49:56 +01:00
|
|
|
|
|
|
|
AsOfSystemTime: overlay.AsOfSystemTimeConfig{
|
|
|
|
Enabled: true,
|
|
|
|
DefaultInterval: -time.Microsecond,
|
|
|
|
},
|
2020-04-14 21:50:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
const (
|
|
|
|
// staleness is how stale the cache can be before we sync with
|
2020-08-11 15:50:01 +01:00
|
|
|
// the database to refresh the cache.
|
2020-04-14 21:50:02 +01:00
|
|
|
|
2022-06-28 12:53:39 +01:00
|
|
|
// using a low time will force the cache to refresh every time.
|
|
|
|
lowStaleness = 2 * time.Nanosecond
|
2020-04-14 21:50:02 +01:00
|
|
|
|
|
|
|
// using a positive time will make it so that the cache is only refreshed when
|
2020-08-11 15:50:01 +01:00
|
|
|
// it hasn't been in the past hour.
|
2020-04-14 21:50:02 +01:00
|
|
|
highStaleness = time.Hour
|
|
|
|
)
|
|
|
|
|
|
|
|
func TestRefresh(t *testing.T) {
|
|
|
|
satellitedbtest.Run(t, func(ctx *testcontext.Context, t *testing.T, db satellite.DB) {
|
2022-06-28 12:53:39 +01:00
|
|
|
cache, err := overlay.NewUploadSelectionCache(zap.NewNop(),
|
2020-04-14 21:50:02 +01:00
|
|
|
db.OverlayCache(),
|
|
|
|
lowStaleness,
|
2020-05-06 18:22:54 +01:00
|
|
|
nodeSelectionConfig,
|
2023-06-30 11:13:18 +01:00
|
|
|
uploadselection.NodeFilters{},
|
|
|
|
overlay.NewPlacementRules().CreateFilters,
|
2020-04-14 21:50:02 +01:00
|
|
|
)
|
2022-06-28 12:53:39 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
cacheCtx, cacheCancel := context.WithCancel(ctx)
|
|
|
|
defer cacheCancel()
|
|
|
|
ctx.Go(func() error { return cache.Run(cacheCtx) })
|
|
|
|
|
2020-04-14 21:50:02 +01:00
|
|
|
// the cache should have no nodes to start
|
2022-06-28 12:53:39 +01:00
|
|
|
err = cache.Refresh(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
reputable, new, err := cache.Size(ctx)
|
2020-04-14 21:50:02 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, 0, reputable)
|
|
|
|
require.Equal(t, 0, new)
|
|
|
|
|
|
|
|
// add some nodes to the database
|
|
|
|
const nodeCount = 2
|
2020-07-08 15:28:49 +01:00
|
|
|
addNodesToNodesTable(ctx, t, db.OverlayCache(), nodeCount, 0)
|
2020-04-14 21:50:02 +01:00
|
|
|
|
|
|
|
// confirm nodes are in the cache once
|
|
|
|
err = cache.Refresh(ctx)
|
|
|
|
require.NoError(t, err)
|
2022-06-28 12:53:39 +01:00
|
|
|
reputable, new, err = cache.Size(ctx)
|
|
|
|
require.NoError(t, err)
|
2020-04-14 21:50:02 +01:00
|
|
|
require.Equal(t, 2, new)
|
|
|
|
require.Equal(t, 0, reputable)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2021-01-28 14:33:53 +00:00
|
|
|
func addNodesToNodesTable(ctx context.Context, t *testing.T, db overlay.DB, count, makeReputable int) (ids []storj.NodeID) {
|
2020-04-14 21:50:02 +01:00
|
|
|
for i := 0; i < count; i++ {
|
|
|
|
subnet := strconv.Itoa(i) + ".1.2"
|
|
|
|
addr := subnet + ".3:8080"
|
|
|
|
n := overlay.NodeCheckInInfo{
|
|
|
|
NodeID: storj.NodeID{byte(i)},
|
|
|
|
Address: &pb.NodeAddress{
|
2023-01-24 15:59:47 +00:00
|
|
|
Address: addr,
|
2020-04-14 21:50:02 +01:00
|
|
|
},
|
|
|
|
LastNet: subnet,
|
|
|
|
LastIPPort: addr,
|
|
|
|
IsUp: true,
|
|
|
|
Capacity: &pb.NodeCapacity{
|
2020-07-08 15:28:49 +01:00
|
|
|
FreeDisk: 200 * memory.MiB.Int64(),
|
2020-04-14 21:50:02 +01:00
|
|
|
},
|
|
|
|
Version: &pb.NodeVersion{
|
|
|
|
Version: "v1.1.0",
|
|
|
|
CommitHash: "",
|
|
|
|
Timestamp: time.Time{},
|
|
|
|
Release: true,
|
|
|
|
},
|
|
|
|
}
|
2020-05-06 18:22:54 +01:00
|
|
|
err := db.UpdateCheckIn(ctx, n, time.Now().UTC(), nodeSelectionConfig)
|
2020-04-14 21:50:02 +01:00
|
|
|
require.NoError(t, err)
|
2020-04-24 17:11:04 +01:00
|
|
|
|
2020-07-08 15:28:49 +01:00
|
|
|
// make designated nodes reputable
|
|
|
|
if i < makeReputable {
|
2021-07-07 20:20:23 +01:00
|
|
|
vettedAt, err := db.TestVetNode(ctx, storj.NodeID{byte(i)})
|
2020-04-24 17:11:04 +01:00
|
|
|
require.NoError(t, err)
|
2021-07-07 20:20:23 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, vettedAt)
|
2021-01-28 14:33:53 +00:00
|
|
|
ids = append(ids, storj.NodeID{byte(i)})
|
2020-04-24 17:11:04 +01:00
|
|
|
}
|
2020-04-14 21:50:02 +01:00
|
|
|
}
|
2021-01-28 14:33:53 +00:00
|
|
|
return ids
|
2020-04-14 21:50:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
type mockdb struct {
|
|
|
|
mu sync.Mutex
|
|
|
|
callCount int
|
2023-06-30 11:35:07 +01:00
|
|
|
reputable []*uploadselection.SelectedNode
|
|
|
|
new []*uploadselection.SelectedNode
|
2020-04-14 21:50:02 +01:00
|
|
|
}
|
|
|
|
|
2023-06-30 11:35:07 +01:00
|
|
|
func (m *mockdb) SelectAllStorageNodesUpload(ctx context.Context, selectionCfg overlay.NodeSelectionConfig) (reputable, new []*uploadselection.SelectedNode, err error) {
|
2020-04-14 21:50:02 +01:00
|
|
|
m.mu.Lock()
|
|
|
|
defer m.mu.Unlock()
|
|
|
|
sync2.Sleep(ctx, 500*time.Millisecond)
|
|
|
|
m.callCount++
|
2020-05-06 18:00:07 +01:00
|
|
|
|
2023-06-30 11:35:07 +01:00
|
|
|
reputable = make([]*uploadselection.SelectedNode, len(m.reputable))
|
2020-05-06 18:00:07 +01:00
|
|
|
for i, n := range m.reputable {
|
|
|
|
reputable[i] = n.Clone()
|
|
|
|
}
|
2023-06-30 11:35:07 +01:00
|
|
|
new = make([]*uploadselection.SelectedNode, len(m.new))
|
2020-05-06 18:00:07 +01:00
|
|
|
for i, n := range m.new {
|
|
|
|
new[i] = n.Clone()
|
|
|
|
}
|
|
|
|
|
|
|
|
return reputable, new, nil
|
2020-04-14 21:50:02 +01:00
|
|
|
}
|
2020-05-06 18:00:07 +01:00
|
|
|
|
2020-04-14 21:50:02 +01:00
|
|
|
func TestRefreshConcurrent(t *testing.T) {
|
|
|
|
ctx := testcontext.New(t)
|
|
|
|
defer ctx.Cleanup()
|
|
|
|
|
|
|
|
// concurrent cache.Refresh with high staleness, where high staleness means the
|
|
|
|
// cache should only be refreshed the first time we call cache.Refresh
|
|
|
|
mockDB := mockdb{}
|
2022-06-28 12:53:39 +01:00
|
|
|
cache, err := overlay.NewUploadSelectionCache(zap.NewNop(),
|
2020-04-14 21:50:02 +01:00
|
|
|
&mockDB,
|
|
|
|
highStaleness,
|
2020-05-06 18:22:54 +01:00
|
|
|
nodeSelectionConfig,
|
2023-06-30 11:13:18 +01:00
|
|
|
uploadselection.NodeFilters{},
|
|
|
|
overlay.NewPlacementRules().CreateFilters,
|
2020-04-14 21:50:02 +01:00
|
|
|
)
|
2022-06-28 12:53:39 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
cacheCtx, cacheCancel := context.WithCancel(ctx)
|
|
|
|
defer cacheCancel()
|
|
|
|
ctx.Go(func() error { return cache.Run(cacheCtx) })
|
2020-04-14 21:50:02 +01:00
|
|
|
|
|
|
|
var group errgroup.Group
|
|
|
|
group.Go(func() error {
|
|
|
|
return cache.Refresh(ctx)
|
|
|
|
})
|
|
|
|
group.Go(func() error {
|
|
|
|
return cache.Refresh(ctx)
|
|
|
|
})
|
2022-06-28 12:53:39 +01:00
|
|
|
require.NoError(t, group.Wait())
|
2020-04-14 21:50:02 +01:00
|
|
|
|
|
|
|
require.Equal(t, 1, mockDB.callCount)
|
|
|
|
|
|
|
|
// concurrent cache.Refresh with low staleness, where low staleness
|
|
|
|
// means that the cache will refresh *every time* cache.Refresh is called
|
|
|
|
mockDB = mockdb{}
|
2022-06-28 12:53:39 +01:00
|
|
|
cache, err = overlay.NewUploadSelectionCache(zap.NewNop(),
|
2020-04-14 21:50:02 +01:00
|
|
|
&mockDB,
|
|
|
|
lowStaleness,
|
2020-05-06 18:22:54 +01:00
|
|
|
nodeSelectionConfig,
|
2023-06-30 11:13:18 +01:00
|
|
|
uploadselection.NodeFilters{},
|
|
|
|
overlay.NewPlacementRules().CreateFilters,
|
2020-04-14 21:50:02 +01:00
|
|
|
)
|
2022-06-28 12:53:39 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
ctx.Go(func() error { return cache.Run(cacheCtx) })
|
2020-04-14 21:50:02 +01:00
|
|
|
group.Go(func() error {
|
|
|
|
return cache.Refresh(ctx)
|
|
|
|
})
|
|
|
|
group.Go(func() error {
|
|
|
|
return cache.Refresh(ctx)
|
|
|
|
})
|
|
|
|
err = group.Wait()
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
2022-06-28 12:53:39 +01:00
|
|
|
require.True(t, 1 <= mockDB.callCount && mockDB.callCount <= 2, "calls %d", mockDB.callCount)
|
2020-04-14 21:50:02 +01:00
|
|
|
}
|
|
|
|
|
2020-05-06 18:00:07 +01:00
|
|
|
func TestGetNodes(t *testing.T) {
|
2020-04-14 21:50:02 +01:00
|
|
|
satellitedbtest.Run(t, func(ctx *testcontext.Context, t *testing.T, db satellite.DB) {
|
2020-05-06 18:22:54 +01:00
|
|
|
var nodeSelectionConfig = overlay.NodeSelectionConfig{
|
2020-04-14 21:50:02 +01:00
|
|
|
NewNodeFraction: 0.2,
|
|
|
|
MinimumVersion: "v1.0.0",
|
|
|
|
OnlineWindow: 4 * time.Hour,
|
|
|
|
DistinctIP: true,
|
|
|
|
MinimumDiskSpace: 100 * memory.MiB,
|
|
|
|
}
|
2022-06-28 12:53:39 +01:00
|
|
|
cache, err := overlay.NewUploadSelectionCache(zap.NewNop(),
|
2020-04-14 21:50:02 +01:00
|
|
|
db.OverlayCache(),
|
|
|
|
lowStaleness,
|
2020-05-06 18:22:54 +01:00
|
|
|
nodeSelectionConfig,
|
2023-06-30 11:13:18 +01:00
|
|
|
uploadselection.NodeFilters{},
|
|
|
|
overlay.NewPlacementRules().CreateFilters,
|
2020-04-14 21:50:02 +01:00
|
|
|
)
|
2022-06-28 12:53:39 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
cacheCtx, cacheCancel := context.WithCancel(ctx)
|
|
|
|
defer cacheCancel()
|
|
|
|
ctx.Go(func() error { return cache.Run(cacheCtx) })
|
|
|
|
|
2020-04-14 21:50:02 +01:00
|
|
|
// the cache should have no nodes to start
|
2022-06-28 12:53:39 +01:00
|
|
|
reputable, new, err := cache.Size(ctx)
|
|
|
|
require.NoError(t, err)
|
2020-04-14 21:50:02 +01:00
|
|
|
require.Equal(t, 0, reputable)
|
|
|
|
require.Equal(t, 0, new)
|
|
|
|
|
2020-07-08 15:28:49 +01:00
|
|
|
// add 4 nodes to the database and vet 2
|
2020-04-14 21:50:02 +01:00
|
|
|
const nodeCount = 4
|
2020-07-08 15:28:49 +01:00
|
|
|
nodeIds := addNodesToNodesTable(ctx, t, db.OverlayCache(), nodeCount, 2)
|
|
|
|
require.Len(t, nodeIds, 2)
|
2020-04-14 21:50:02 +01:00
|
|
|
|
|
|
|
// confirm cache.GetNodes returns the correct nodes
|
|
|
|
selectedNodes, err := cache.GetNodes(ctx, overlay.FindStorageNodesRequest{RequestedCount: 2})
|
|
|
|
require.NoError(t, err)
|
2022-06-28 12:53:39 +01:00
|
|
|
reputable, new, err = cache.Size(ctx)
|
|
|
|
require.NoError(t, err)
|
2020-07-08 15:28:49 +01:00
|
|
|
require.Equal(t, 2, new)
|
|
|
|
require.Equal(t, 2, reputable)
|
2020-04-14 21:50:02 +01:00
|
|
|
require.Equal(t, 2, len(selectedNodes))
|
|
|
|
for _, node := range selectedNodes {
|
|
|
|
require.NotEqual(t, node.ID, "")
|
|
|
|
require.NotEqual(t, node.Address.Address, "")
|
|
|
|
require.NotEqual(t, node.LastIPPort, "")
|
|
|
|
require.NotEqual(t, node.LastNet, "")
|
|
|
|
require.NotEqual(t, node.LastNet, "")
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
2020-05-06 18:00:07 +01:00
|
|
|
|
2022-02-25 16:53:24 +00:00
|
|
|
func TestGetNodesExcludeCountryCodes(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 2, UplinkCount: 0,
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
err := planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, planet.StorageNodes[0].ID(), "FR")
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
cache := planet.Satellites[0].Overlay.Service.UploadSelectionCache
|
|
|
|
|
|
|
|
// confirm cache.GetNodes returns the correct nodes
|
|
|
|
selectedNodes, err := cache.GetNodes(ctx, overlay.FindStorageNodesRequest{RequestedCount: 2})
|
|
|
|
// we only expect one node to be returned, even though we requested two, so there will be an error
|
|
|
|
require.Error(t, err)
|
|
|
|
|
2022-06-28 12:53:39 +01:00
|
|
|
_, new, err := cache.Size(ctx)
|
|
|
|
require.NoError(t, err)
|
2022-02-25 16:53:24 +00:00
|
|
|
require.Equal(t, 2, new)
|
|
|
|
require.Equal(t, 1, len(selectedNodes))
|
|
|
|
// the node that was returned should be the one that does not have the "FR" country code
|
|
|
|
require.Equal(t, planet.StorageNodes[1].ID(), selectedNodes[0].ID)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-05-06 18:00:07 +01:00
|
|
|
func TestGetNodesConcurrent(t *testing.T) {
|
2020-04-14 21:50:02 +01:00
|
|
|
ctx := testcontext.New(t)
|
|
|
|
defer ctx.Cleanup()
|
|
|
|
|
2023-06-30 11:35:07 +01:00
|
|
|
reputableNodes := []*uploadselection.SelectedNode{{
|
2020-05-06 18:00:07 +01:00
|
|
|
ID: storj.NodeID{1},
|
|
|
|
Address: &pb.NodeAddress{Address: "127.0.0.9"},
|
|
|
|
LastNet: "127.0.0",
|
|
|
|
LastIPPort: "127.0.0.9:8000",
|
|
|
|
}}
|
2023-06-30 11:35:07 +01:00
|
|
|
newNodes := []*uploadselection.SelectedNode{{
|
2020-05-06 18:00:07 +01:00
|
|
|
ID: storj.NodeID{1},
|
|
|
|
Address: &pb.NodeAddress{Address: "127.0.0.10"},
|
|
|
|
LastNet: "127.0.0",
|
|
|
|
LastIPPort: "127.0.0.10:8000",
|
|
|
|
}}
|
|
|
|
|
2020-04-14 21:50:02 +01:00
|
|
|
// concurrent GetNodes with high staleness, where high staleness means the
|
|
|
|
// cache should only be refreshed the first time we call cache.GetNodes
|
2020-05-06 18:00:07 +01:00
|
|
|
mockDB := mockdb{
|
|
|
|
reputable: reputableNodes,
|
|
|
|
new: newNodes,
|
|
|
|
}
|
2022-06-28 12:53:39 +01:00
|
|
|
cache, err := overlay.NewUploadSelectionCache(zap.NewNop(),
|
2020-04-14 21:50:02 +01:00
|
|
|
&mockDB,
|
|
|
|
highStaleness,
|
2020-05-06 18:22:54 +01:00
|
|
|
nodeSelectionConfig,
|
2023-06-30 11:13:18 +01:00
|
|
|
uploadselection.NodeFilters{},
|
|
|
|
overlay.NewPlacementRules().CreateFilters,
|
2020-04-14 21:50:02 +01:00
|
|
|
)
|
2022-06-28 12:53:39 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
cacheCtx, cacheCancel := context.WithCancel(ctx)
|
|
|
|
defer cacheCancel()
|
|
|
|
ctx.Go(func() error { return cache.Run(cacheCtx) })
|
2020-04-14 21:50:02 +01:00
|
|
|
|
|
|
|
var group errgroup.Group
|
|
|
|
group.Go(func() error {
|
2020-05-06 18:00:07 +01:00
|
|
|
nodes, err := cache.GetNodes(ctx, overlay.FindStorageNodesRequest{
|
2020-05-07 12:54:48 +01:00
|
|
|
RequestedCount: 1,
|
2020-05-06 18:00:07 +01:00
|
|
|
})
|
|
|
|
for i := range nodes {
|
|
|
|
nodes[i].ID = storj.NodeID{byte(i)}
|
|
|
|
nodes[i].Address.Address = "123.123.123.123"
|
|
|
|
}
|
|
|
|
nodes[0] = nil
|
2020-04-14 21:50:02 +01:00
|
|
|
return err
|
|
|
|
})
|
|
|
|
group.Go(func() error {
|
2020-05-06 18:00:07 +01:00
|
|
|
nodes, err := cache.GetNodes(ctx, overlay.FindStorageNodesRequest{
|
2020-05-07 12:54:48 +01:00
|
|
|
RequestedCount: 1,
|
2020-05-06 18:00:07 +01:00
|
|
|
})
|
|
|
|
for i := range nodes {
|
|
|
|
nodes[i].ID = storj.NodeID{byte(i)}
|
|
|
|
nodes[i].Address.Address = "123.123.123.123"
|
|
|
|
}
|
|
|
|
nodes[0] = nil
|
2020-04-14 21:50:02 +01:00
|
|
|
return err
|
|
|
|
})
|
2022-06-28 12:53:39 +01:00
|
|
|
|
|
|
|
require.NoError(t, group.Wait())
|
2020-04-14 21:50:02 +01:00
|
|
|
// expect only one call to the db via cache.GetNodes
|
|
|
|
require.Equal(t, 1, mockDB.callCount)
|
|
|
|
|
|
|
|
// concurrent get nodes with low staleness, where low staleness means that
|
|
|
|
// the cache will refresh each time cache.GetNodes is called
|
2020-05-06 18:00:07 +01:00
|
|
|
mockDB = mockdb{
|
|
|
|
reputable: reputableNodes,
|
|
|
|
new: newNodes,
|
|
|
|
}
|
2022-06-28 12:53:39 +01:00
|
|
|
cache, err = overlay.NewUploadSelectionCache(zap.NewNop(),
|
2020-04-14 21:50:02 +01:00
|
|
|
&mockDB,
|
|
|
|
lowStaleness,
|
2020-05-06 18:22:54 +01:00
|
|
|
nodeSelectionConfig,
|
2023-06-30 11:13:18 +01:00
|
|
|
uploadselection.NodeFilters{},
|
|
|
|
overlay.NewPlacementRules().CreateFilters,
|
2020-04-14 21:50:02 +01:00
|
|
|
)
|
2022-06-28 12:53:39 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
ctx.Go(func() error { return cache.Run(cacheCtx) })
|
2020-04-14 21:50:02 +01:00
|
|
|
|
|
|
|
group.Go(func() error {
|
2020-05-06 18:00:07 +01:00
|
|
|
nodes, err := cache.GetNodes(ctx, overlay.FindStorageNodesRequest{
|
2020-05-07 12:54:48 +01:00
|
|
|
RequestedCount: 1,
|
2020-05-06 18:00:07 +01:00
|
|
|
})
|
|
|
|
for i := range nodes {
|
|
|
|
nodes[i].ID = storj.NodeID{byte(i)}
|
|
|
|
nodes[i].Address.Address = "123.123.123.123"
|
|
|
|
}
|
|
|
|
nodes[0] = nil
|
2020-04-14 21:50:02 +01:00
|
|
|
return err
|
|
|
|
})
|
|
|
|
group.Go(func() error {
|
2020-05-06 18:00:07 +01:00
|
|
|
nodes, err := cache.GetNodes(ctx, overlay.FindStorageNodesRequest{
|
2020-05-07 12:54:48 +01:00
|
|
|
RequestedCount: 1,
|
2020-05-06 18:00:07 +01:00
|
|
|
})
|
|
|
|
for i := range nodes {
|
|
|
|
nodes[i].ID = storj.NodeID{byte(i)}
|
|
|
|
nodes[i].Address.Address = "123.123.123.123"
|
|
|
|
}
|
|
|
|
nodes[0] = nil
|
2020-04-14 21:50:02 +01:00
|
|
|
return err
|
|
|
|
})
|
|
|
|
err = group.Wait()
|
|
|
|
require.NoError(t, err)
|
2022-06-28 12:53:39 +01:00
|
|
|
// expect up to two calls to the db via cache.GetNodes
|
|
|
|
require.True(t, 1 <= mockDB.callCount && mockDB.callCount <= 2, "calls %d", mockDB.callCount)
|
2020-04-14 21:50:02 +01:00
|
|
|
}
|
|
|
|
|
2020-05-06 18:22:54 +01:00
|
|
|
func TestGetNodesDistinct(t *testing.T) {
|
|
|
|
ctx := testcontext.New(t)
|
|
|
|
defer ctx.Cleanup()
|
|
|
|
|
2023-06-30 11:35:07 +01:00
|
|
|
reputableNodes := []*uploadselection.SelectedNode{{
|
2020-05-06 18:22:54 +01:00
|
|
|
ID: testrand.NodeID(),
|
|
|
|
Address: &pb.NodeAddress{Address: "127.0.0.9"},
|
|
|
|
LastNet: "127.0.0",
|
|
|
|
LastIPPort: "127.0.0.9:8000",
|
|
|
|
}, {
|
|
|
|
ID: testrand.NodeID(),
|
|
|
|
Address: &pb.NodeAddress{Address: "127.0.0.6"},
|
|
|
|
LastNet: "127.0.0",
|
|
|
|
LastIPPort: "127.0.0.6:8000",
|
|
|
|
}, {
|
|
|
|
ID: testrand.NodeID(),
|
|
|
|
Address: &pb.NodeAddress{Address: "127.0.1.7"},
|
|
|
|
LastNet: "127.0.1",
|
|
|
|
LastIPPort: "127.0.1.7:8000",
|
|
|
|
}, {
|
|
|
|
ID: testrand.NodeID(),
|
|
|
|
Address: &pb.NodeAddress{Address: "127.0.2.7"},
|
|
|
|
LastNet: "127.0.2",
|
|
|
|
LastIPPort: "127.0.2.7:8000",
|
|
|
|
}}
|
|
|
|
|
2023-06-30 11:35:07 +01:00
|
|
|
newNodes := []*uploadselection.SelectedNode{{
|
2020-05-06 18:22:54 +01:00
|
|
|
ID: testrand.NodeID(),
|
|
|
|
Address: &pb.NodeAddress{Address: "127.0.0.10"},
|
|
|
|
LastNet: "127.0.0",
|
|
|
|
LastIPPort: "127.0.0.10:8000",
|
|
|
|
}, {
|
|
|
|
ID: testrand.NodeID(),
|
|
|
|
Address: &pb.NodeAddress{Address: "127.0.1.8"},
|
|
|
|
LastNet: "127.0.1",
|
|
|
|
LastIPPort: "127.0.1.8:8000",
|
|
|
|
}, {
|
|
|
|
ID: testrand.NodeID(),
|
|
|
|
Address: &pb.NodeAddress{Address: "127.0.2.8"},
|
|
|
|
LastNet: "127.0.2",
|
|
|
|
LastIPPort: "127.0.2.8:8000",
|
|
|
|
}}
|
|
|
|
|
|
|
|
mockDB := mockdb{
|
|
|
|
reputable: reputableNodes,
|
|
|
|
new: newNodes,
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
// test that distinct ip doesn't return same last net
|
|
|
|
config := nodeSelectionConfig
|
|
|
|
config.NewNodeFraction = 0.5
|
|
|
|
config.DistinctIP = true
|
2022-06-28 12:53:39 +01:00
|
|
|
cache, err := overlay.NewUploadSelectionCache(zap.NewNop(),
|
2020-05-06 18:22:54 +01:00
|
|
|
&mockDB,
|
|
|
|
highStaleness,
|
|
|
|
config,
|
2023-06-30 11:13:18 +01:00
|
|
|
uploadselection.NodeFilters{}.WithAutoExcludeSubnets(),
|
|
|
|
overlay.NewPlacementRules().CreateFilters,
|
2020-05-06 18:22:54 +01:00
|
|
|
)
|
2022-06-28 12:53:39 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
cacheCtx, cacheCancel := context.WithCancel(ctx)
|
|
|
|
defer cacheCancel()
|
|
|
|
ctx.Go(func() error { return cache.Run(cacheCtx) })
|
2020-05-06 18:22:54 +01:00
|
|
|
|
|
|
|
// selecting 3 should be possible
|
|
|
|
nodes, err := cache.GetNodes(ctx, overlay.FindStorageNodesRequest{
|
2020-05-07 12:54:48 +01:00
|
|
|
RequestedCount: 3,
|
2020-05-06 18:22:54 +01:00
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
seen := make(map[string]bool)
|
|
|
|
for _, n := range nodes {
|
|
|
|
require.False(t, seen[n.LastNet])
|
|
|
|
seen[n.LastNet] = true
|
|
|
|
}
|
|
|
|
|
|
|
|
// selecting 6 is impossible
|
|
|
|
_, err = cache.GetNodes(ctx, overlay.FindStorageNodesRequest{
|
2020-05-07 12:54:48 +01:00
|
|
|
RequestedCount: 6,
|
2020-05-06 18:22:54 +01:00
|
|
|
})
|
|
|
|
require.Error(t, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
{ // test that distinctIP=true allows selecting 6 nodes
|
satellite/overlay: configurable meaning of last_net
Up to now, we have been implementing the DistinctIP preference with code
in two places:
1. On check-in, the last_net is determined by taking the /24 or /64
(in ResolveIPAndNetwork()) and we store it with the node record.
2. On node selection, a preference parameter defines whether to return
results that are distinct on last_net.
It can be observed that we have never yet had the need to switch from
DistinctIP to !DistinctIP, or from !DistinctIP to DistinctIP, on the
same satellite, and we will probably never need to do so in an automated
way. It can also be observed that this arrangement makes tests more
complicated, because we often have to arrange for test nodes to have IP
addresses in different /24 networks (a particular pain on macOS).
Those two considerations, plus some pending work on the repair framework
that will make repair take last_net into consideration, motivate this
change.
With this change, in the #2 place, we will _always_ return results that
are distinct on last_net. We implement the DistinctIP preference, then,
by making the #1 place (ResolveIPAndNetwork()) more flexible. When
DistinctIP is enabled, last_net will be calculated as it was before. But
when DistinctIP is _off_, last_net can be the same as address (IP and
port). That will effectively implement !DistinctIP because every
record will have a distinct last_net already.
As a side effect, this flexibility will allow us to change the rules
about last_net construction arbitrarily. We can do tests where last_net
is set to the source IP, or to a /30 prefix, or a /16 prefix, etc., and
be able to exercise the production logic without requiring a virtual
network bridge.
This change should be safe to make without any migration code, because
all known production satellite deployments use DistinctIP, and the
associated last_net values will not change for them. They will only
change for satellites with !DistinctIP, which are mostly test
deployments that can be recreated trivially. For those satellites which
are both permanent and !DistinctIP, node selection will suddenly start
acting as though DistinctIP is enabled, until the operator runs a single
SQL update "UPDATE nodes SET last_net = last_ip_port". That can be done
either before or after deploying software with this change.
I also assert that this will not hurt performance for production
deployments. It's true that adding the distinct requirement to node
selection makes things a little slower, but the distinct requirement is
already present for all production deployments, and they will see no
change.
Refs: https://github.com/storj/storj/issues/5391
Change-Id: I0e7e92498c3da768df5b4d5fb213dcd2d4862924
2023-02-28 22:57:39 +00:00
|
|
|
// emulate DistinctIP=false behavior by filling in LastNets with unique addresses
|
2023-06-30 11:35:07 +01:00
|
|
|
for _, nodeList := range [][]*uploadselection.SelectedNode{reputableNodes, newNodes} {
|
satellite/overlay: configurable meaning of last_net
Up to now, we have been implementing the DistinctIP preference with code
in two places:
1. On check-in, the last_net is determined by taking the /24 or /64
(in ResolveIPAndNetwork()) and we store it with the node record.
2. On node selection, a preference parameter defines whether to return
results that are distinct on last_net.
It can be observed that we have never yet had the need to switch from
DistinctIP to !DistinctIP, or from !DistinctIP to DistinctIP, on the
same satellite, and we will probably never need to do so in an automated
way. It can also be observed that this arrangement makes tests more
complicated, because we often have to arrange for test nodes to have IP
addresses in different /24 networks (a particular pain on macOS).
Those two considerations, plus some pending work on the repair framework
that will make repair take last_net into consideration, motivate this
change.
With this change, in the #2 place, we will _always_ return results that
are distinct on last_net. We implement the DistinctIP preference, then,
by making the #1 place (ResolveIPAndNetwork()) more flexible. When
DistinctIP is enabled, last_net will be calculated as it was before. But
when DistinctIP is _off_, last_net can be the same as address (IP and
port). That will effectively implement !DistinctIP because every
record will have a distinct last_net already.
As a side effect, this flexibility will allow us to change the rules
about last_net construction arbitrarily. We can do tests where last_net
is set to the source IP, or to a /30 prefix, or a /16 prefix, etc., and
be able to exercise the production logic without requiring a virtual
network bridge.
This change should be safe to make without any migration code, because
all known production satellite deployments use DistinctIP, and the
associated last_net values will not change for them. They will only
change for satellites with !DistinctIP, which are mostly test
deployments that can be recreated trivially. For those satellites which
are both permanent and !DistinctIP, node selection will suddenly start
acting as though DistinctIP is enabled, until the operator runs a single
SQL update "UPDATE nodes SET last_net = last_ip_port". That can be done
either before or after deploying software with this change.
I also assert that this will not hurt performance for production
deployments. It's true that adding the distinct requirement to node
selection makes things a little slower, but the distinct requirement is
already present for all production deployments, and they will see no
change.
Refs: https://github.com/storj/storj/issues/5391
Change-Id: I0e7e92498c3da768df5b4d5fb213dcd2d4862924
2023-02-28 22:57:39 +00:00
|
|
|
for i := range nodeList {
|
|
|
|
nodeList[i].LastNet = nodeList[i].LastIPPort
|
|
|
|
}
|
|
|
|
}
|
2020-05-06 18:22:54 +01:00
|
|
|
config := nodeSelectionConfig
|
|
|
|
config.NewNodeFraction = 0.5
|
|
|
|
config.DistinctIP = false
|
2022-06-28 12:53:39 +01:00
|
|
|
cache, err := overlay.NewUploadSelectionCache(zap.NewNop(),
|
2020-05-06 18:22:54 +01:00
|
|
|
&mockDB,
|
|
|
|
highStaleness,
|
|
|
|
config,
|
2023-06-30 11:13:18 +01:00
|
|
|
uploadselection.NodeFilters{},
|
|
|
|
overlay.NewPlacementRules().CreateFilters,
|
2020-05-06 18:22:54 +01:00
|
|
|
)
|
2022-06-28 12:53:39 +01:00
|
|
|
require.NoError(t, err)
|
2020-05-06 18:22:54 +01:00
|
|
|
|
2022-06-28 12:53:39 +01:00
|
|
|
cacheCtx, cacheCancel := context.WithCancel(ctx)
|
|
|
|
defer cacheCancel()
|
|
|
|
ctx.Go(func() error { return cache.Run(cacheCtx) })
|
|
|
|
|
|
|
|
_, err = cache.GetNodes(ctx, overlay.FindStorageNodesRequest{
|
2020-05-07 12:54:48 +01:00
|
|
|
RequestedCount: 6,
|
2020-05-06 18:22:54 +01:00
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-06 18:00:07 +01:00
|
|
|
func TestGetNodesError(t *testing.T) {
|
2020-04-14 21:50:02 +01:00
|
|
|
ctx := testcontext.New(t)
|
|
|
|
defer ctx.Cleanup()
|
|
|
|
|
|
|
|
mockDB := mockdb{}
|
2022-06-28 12:53:39 +01:00
|
|
|
cache, err := overlay.NewUploadSelectionCache(zap.NewNop(),
|
2020-04-14 21:50:02 +01:00
|
|
|
&mockDB,
|
|
|
|
highStaleness,
|
2020-05-06 18:22:54 +01:00
|
|
|
nodeSelectionConfig,
|
2023-06-30 11:13:18 +01:00
|
|
|
uploadselection.NodeFilters{},
|
|
|
|
overlay.NewPlacementRules().CreateFilters,
|
2020-04-14 21:50:02 +01:00
|
|
|
)
|
2022-06-28 12:53:39 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
cacheCtx, cacheCancel := context.WithCancel(ctx)
|
|
|
|
defer cacheCancel()
|
|
|
|
ctx.Go(func() error { return cache.Run(cacheCtx) })
|
2020-04-14 21:50:02 +01:00
|
|
|
|
|
|
|
// there should be 0 nodes in the cache
|
2022-06-28 12:53:39 +01:00
|
|
|
reputable, new, err := cache.Size(ctx)
|
|
|
|
require.NoError(t, err)
|
2020-04-14 21:50:02 +01:00
|
|
|
require.Equal(t, 0, reputable)
|
|
|
|
require.Equal(t, 0, new)
|
|
|
|
|
|
|
|
// since the cache has no nodes, we should not be able
|
|
|
|
// to get 2 storage nodes from it and we expect an error
|
2022-06-28 12:53:39 +01:00
|
|
|
_, err = cache.GetNodes(ctx, overlay.FindStorageNodesRequest{RequestedCount: 2})
|
2020-04-14 21:50:02 +01:00
|
|
|
require.Error(t, err)
|
|
|
|
}
|
2020-04-24 17:11:04 +01:00
|
|
|
|
|
|
|
func TestNewNodeFraction(t *testing.T) {
|
|
|
|
satellitedbtest.Run(t, func(ctx *testcontext.Context, t *testing.T, db satellite.DB) {
|
|
|
|
newNodeFraction := 0.2
|
2020-05-06 18:22:54 +01:00
|
|
|
var nodeSelectionConfig = overlay.NodeSelectionConfig{
|
2020-04-24 17:11:04 +01:00
|
|
|
NewNodeFraction: newNodeFraction,
|
|
|
|
MinimumVersion: "v1.0.0",
|
|
|
|
OnlineWindow: 4 * time.Hour,
|
|
|
|
DistinctIP: true,
|
|
|
|
MinimumDiskSpace: 10 * memory.MiB,
|
|
|
|
}
|
2022-06-28 12:53:39 +01:00
|
|
|
cache, err := overlay.NewUploadSelectionCache(zap.NewNop(),
|
2020-04-24 17:11:04 +01:00
|
|
|
db.OverlayCache(),
|
|
|
|
lowStaleness,
|
2020-05-06 18:22:54 +01:00
|
|
|
nodeSelectionConfig,
|
2023-06-30 11:13:18 +01:00
|
|
|
uploadselection.NodeFilters{},
|
|
|
|
overlay.NewPlacementRules().CreateFilters,
|
2020-04-24 17:11:04 +01:00
|
|
|
)
|
2022-06-28 12:53:39 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
cacheCtx, cacheCancel := context.WithCancel(ctx)
|
|
|
|
defer cacheCancel()
|
|
|
|
ctx.Go(func() error { return cache.Run(cacheCtx) })
|
|
|
|
|
2020-04-24 17:11:04 +01:00
|
|
|
// the cache should have no nodes to start
|
2022-06-28 12:53:39 +01:00
|
|
|
err = cache.Refresh(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
reputable, new, err := cache.Size(ctx)
|
2020-04-24 17:11:04 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, 0, reputable)
|
|
|
|
require.Equal(t, 0, new)
|
|
|
|
|
|
|
|
// add some nodes to the database, some are reputable and some are new nodes
|
|
|
|
const nodeCount = 10
|
2020-07-08 15:28:49 +01:00
|
|
|
repIDs := addNodesToNodesTable(ctx, t, db.OverlayCache(), nodeCount, 4)
|
|
|
|
require.Len(t, repIDs, 4)
|
2020-04-24 17:11:04 +01:00
|
|
|
// confirm nodes are in the cache once
|
|
|
|
err = cache.Refresh(ctx)
|
|
|
|
require.NoError(t, err)
|
2022-06-28 12:53:39 +01:00
|
|
|
reputable, new, err = cache.Size(ctx)
|
|
|
|
require.NoError(t, err)
|
2020-04-24 17:11:04 +01:00
|
|
|
require.Equal(t, 6, new)
|
|
|
|
require.Equal(t, 4, reputable)
|
|
|
|
|
|
|
|
// select nodes and confirm correct new node fraction
|
|
|
|
n, err := cache.GetNodes(ctx, overlay.FindStorageNodesRequest{RequestedCount: 5})
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, len(n), 5)
|
|
|
|
var reputableCount int
|
|
|
|
for _, id := range repIDs {
|
|
|
|
for _, node := range n {
|
|
|
|
if id == node.ID {
|
|
|
|
reputableCount++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-07-08 15:28:49 +01:00
|
|
|
require.Equal(t, len(n)-reputableCount, int(5*newNodeFraction)) // 1, 1
|
2020-04-24 17:11:04 +01:00
|
|
|
})
|
|
|
|
}
|
2023-06-30 11:13:18 +01:00
|
|
|
|
|
|
|
func BenchmarkGetNodes(b *testing.B) {
|
|
|
|
newNodes := 2000
|
|
|
|
oldNodes := 18000
|
|
|
|
required := 110
|
|
|
|
if testing.Short() {
|
|
|
|
newNodes = 10
|
|
|
|
oldNodes = 50
|
|
|
|
required = 2
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx, cancel := context.WithCancel(testcontext.New(b))
|
|
|
|
defer cancel()
|
|
|
|
log, err := zap.NewDevelopment()
|
|
|
|
require.NoError(b, err)
|
|
|
|
placement := overlay.NewPlacementRules()
|
|
|
|
placement.AddLegacyStaticRules()
|
|
|
|
defaultFilter := uploadselection.NodeFilters{}
|
|
|
|
|
|
|
|
db := NewMockUploadSelectionDb(
|
|
|
|
generatedSelectedNodes(b, oldNodes),
|
|
|
|
generatedSelectedNodes(b, newNodes),
|
|
|
|
)
|
|
|
|
cache, err := overlay.NewUploadSelectionCache(log, db, 10*time.Minute, overlay.NodeSelectionConfig{
|
|
|
|
NewNodeFraction: 0.1,
|
|
|
|
}, defaultFilter, placement.CreateFilters)
|
|
|
|
require.NoError(b, err)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
_ = cache.Run(ctx)
|
|
|
|
}()
|
|
|
|
|
|
|
|
b.ResetTimer()
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
_, err := cache.GetNodes(ctx, overlay.FindStorageNodesRequest{
|
|
|
|
RequestedCount: required,
|
|
|
|
Placement: storj.US,
|
|
|
|
})
|
|
|
|
require.NoError(b, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// MockUploadSelection implements overlay.UploadSelectionDB with a static list.
|
|
|
|
type MockUploadSelectionDB struct {
|
|
|
|
new []*uploadselection.SelectedNode
|
|
|
|
reputable []*uploadselection.SelectedNode
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewMockUploadSelectionDb creates a MockUploadSelectionDB with the given reputable and new nodes.
|
|
|
|
func NewMockUploadSelectionDb(reputable, new []*uploadselection.SelectedNode) *MockUploadSelectionDB {
|
|
|
|
return &MockUploadSelectionDB{
|
|
|
|
new: new,
|
|
|
|
reputable: reputable,
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// SelectAllStorageNodesUpload implements overlay.UploadSelectionDB.
|
|
|
|
func (m MockUploadSelectionDB) SelectAllStorageNodesUpload(ctx context.Context, selectionCfg overlay.NodeSelectionConfig) (reputable, new []*uploadselection.SelectedNode, err error) {
|
|
|
|
return m.reputable, m.new, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var _ overlay.UploadSelectionDB = &MockUploadSelectionDB{}
|
|
|
|
|
|
|
|
func generatedSelectedNodes(b *testing.B, nodeNo int) []*uploadselection.SelectedNode {
|
|
|
|
nodes := make([]*uploadselection.SelectedNode, nodeNo)
|
|
|
|
ctx := testcontext.New(b)
|
|
|
|
for i := 0; i < nodeNo; i++ {
|
|
|
|
node := uploadselection.SelectedNode{}
|
|
|
|
identity, err := testidentity.NewTestIdentity(ctx)
|
|
|
|
require.NoError(b, err)
|
|
|
|
node.ID = identity.ID
|
|
|
|
|
|
|
|
// with 5% percentage chance, we re-use an existing IP address.
|
|
|
|
if rand.Intn(100) < 5 && i > 0 {
|
|
|
|
prevParts := strings.Split(nodes[rand.Intn(i)].LastIPPort, ":")
|
|
|
|
node.LastIPPort = fmt.Sprintf("%s:%d", prevParts[0], rand.Int31n(10000)+1000)
|
|
|
|
} else {
|
|
|
|
node.LastIPPort = fmt.Sprintf("%d.%d.%d.%d:%d", 10+i/256/256%256, i/256%256, i%256, 1, rand.Int31n(10000)+1000)
|
|
|
|
}
|
|
|
|
|
|
|
|
parts := strings.Split(node.LastIPPort, ".")
|
|
|
|
node.LastNet = fmt.Sprintf("%s.%s.%s.0", parts[0], parts[1], parts[2])
|
|
|
|
node.CountryCode = []location.CountryCode{location.None, location.UnitedStates, location.Germany, location.Hungary, location.Austria}[i%5]
|
|
|
|
nodes[i] = &node
|
|
|
|
}
|
|
|
|
return nodes
|
|
|
|
}
|