satellite/overlay: refactor Reliable to be used with repair checker
Currently we are using Reliable to get missing pieces for repair checker. The issue is that now checker is looking at more things than just missing pieces (clumped/off, placement pieces) and using only node ID is not enough. We have issue where we are skipping offline nodes from clumped and off placement pieces check. Reliable was refactored to get data (e.g. country, lastNet) about all reliable nodes. List is split into online and offline. This data will be cached for quick use by repair checker. It will be also possible to check nodes metadata like country code or lastNet. We are also slowly moving `RepairExcludedCountryCodes` config from overlay to repair which makes more sens for it. This this first part of changes. https://github.com/storj/storj/issues/5998 Change-Id: If534342488c0e440affc2894a8fbda6507b8959d
This commit is contained in:
parent
500b6244f8
commit
f2cd7b0928
@ -105,9 +105,9 @@ func TestDownloadWithSomeNodesOffline(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// confirm that we marked the correct number of storage nodes as offline
|
// confirm that we marked the correct number of storage nodes as offline
|
||||||
nodes, err := satellite.Overlay.Service.Reliable(ctx)
|
online, _, err := satellite.Overlay.Service.Reliable(ctx)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Len(t, nodes, len(planet.StorageNodes)-toKill)
|
require.Len(t, online, len(planet.StorageNodes)-toKill)
|
||||||
|
|
||||||
// we should be able to download data without any of the original nodes
|
// we should be able to download data without any of the original nodes
|
||||||
newData, err := ul.Download(ctx, satellite, "testbucket", "test/path")
|
newData, err := ul.Download(ctx, satellite, "testbucket", "test/path")
|
||||||
|
@ -426,6 +426,7 @@ func TestAllInOne(t *testing.T) {
|
|||||||
satellite.DB.RepairQueue(),
|
satellite.DB.RepairQueue(),
|
||||||
satellite.Overlay.Service,
|
satellite.Overlay.Service,
|
||||||
satellite.Config.Checker,
|
satellite.Config.Checker,
|
||||||
|
[]string{},
|
||||||
),
|
),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -66,8 +66,8 @@ type DB interface {
|
|||||||
KnownReliableInExcludedCountries(context.Context, *NodeCriteria, storj.NodeIDList) (storj.NodeIDList, error)
|
KnownReliableInExcludedCountries(context.Context, *NodeCriteria, storj.NodeIDList) (storj.NodeIDList, error)
|
||||||
// KnownReliable filters a set of nodes to reliable (online and qualified) nodes.
|
// KnownReliable filters a set of nodes to reliable (online and qualified) nodes.
|
||||||
KnownReliable(ctx context.Context, nodeIDs storj.NodeIDList, onlineWindow, asOfSystemInterval time.Duration) (online []uploadselection.SelectedNode, offline []uploadselection.SelectedNode, err error)
|
KnownReliable(ctx context.Context, nodeIDs storj.NodeIDList, onlineWindow, asOfSystemInterval time.Duration) (online []uploadselection.SelectedNode, offline []uploadselection.SelectedNode, err error)
|
||||||
// Reliable returns all nodes that are reliable
|
// Reliable returns all nodes that are reliable, online and offline.
|
||||||
Reliable(context.Context, *NodeCriteria) (storj.NodeIDList, error)
|
Reliable(ctx context.Context, onlineWindow, asOfSystemInterval time.Duration) (online []uploadselection.SelectedNode, offline []uploadselection.SelectedNode, err error)
|
||||||
// UpdateReputation updates the DB columns for all reputation fields in ReputationStatus.
|
// UpdateReputation updates the DB columns for all reputation fields in ReputationStatus.
|
||||||
UpdateReputation(ctx context.Context, id storj.NodeID, request ReputationUpdate) error
|
UpdateReputation(ctx context.Context, id storj.NodeID, request ReputationUpdate) error
|
||||||
// UpdateNodeInfo updates node dossier with info requested from the node itself like node type, email, wallet, capacity, and version.
|
// UpdateNodeInfo updates node dossier with info requested from the node itself like node type, email, wallet, capacity, and version.
|
||||||
@ -577,15 +577,12 @@ func (service *Service) KnownReliable(ctx context.Context, nodeIDs storj.NodeIDL
|
|||||||
return service.db.KnownReliable(ctx, nodeIDs, service.config.Node.OnlineWindow, 0)
|
return service.db.KnownReliable(ctx, nodeIDs, service.config.Node.OnlineWindow, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reliable filters a set of nodes that are reliable, independent of new.
|
// Reliable returns all nodes that are reliable, online and offline.
|
||||||
func (service *Service) Reliable(ctx context.Context) (nodes storj.NodeIDList, err error) {
|
func (service *Service) Reliable(ctx context.Context) (online []uploadselection.SelectedNode, offline []uploadselection.SelectedNode, err error) {
|
||||||
defer mon.Task()(&ctx)(&err)
|
defer mon.Task()(&ctx)(&err)
|
||||||
|
|
||||||
criteria := &NodeCriteria{
|
// TODO add as of system time
|
||||||
OnlineWindow: service.config.Node.OnlineWindow,
|
return service.db.Reliable(ctx, service.config.Node.OnlineWindow, 0)
|
||||||
}
|
|
||||||
criteria.ExcludedCountries = service.config.RepairExcludedCountryCodes
|
|
||||||
return service.db.Reliable(ctx, criteria)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateReputation updates the DB columns for any of the reputation fields.
|
// UpdateReputation updates the DB columns for any of the reputation fields.
|
||||||
|
@ -818,28 +818,6 @@ func TestVetAndUnvetNode(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestReliable(t *testing.T) {
|
|
||||||
testplanet.Run(t, testplanet.Config{
|
|
||||||
SatelliteCount: 1, StorageNodeCount: 2, UplinkCount: 0,
|
|
||||||
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
||||||
service := planet.Satellites[0].Overlay.Service
|
|
||||||
node := planet.StorageNodes[0]
|
|
||||||
|
|
||||||
nodes, err := service.Reliable(ctx)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Len(t, nodes, 2)
|
|
||||||
|
|
||||||
err = planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, node.ID(), "FR")
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
// first node should be excluded from Reliable result because of country code
|
|
||||||
nodes, err = service.Reliable(ctx)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Len(t, nodes, 1)
|
|
||||||
require.NotEqual(t, node.ID(), nodes[0])
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestKnownReliableInExcludedCountries(t *testing.T) {
|
func TestKnownReliableInExcludedCountries(t *testing.T) {
|
||||||
testplanet.Run(t, testplanet.Config{
|
testplanet.Run(t, testplanet.Config{
|
||||||
SatelliteCount: 1, StorageNodeCount: 2, UplinkCount: 0,
|
SatelliteCount: 1, StorageNodeCount: 2, UplinkCount: 0,
|
||||||
@ -847,15 +825,15 @@ func TestKnownReliableInExcludedCountries(t *testing.T) {
|
|||||||
service := planet.Satellites[0].Overlay.Service
|
service := planet.Satellites[0].Overlay.Service
|
||||||
node := planet.StorageNodes[0]
|
node := planet.StorageNodes[0]
|
||||||
|
|
||||||
nodes, err := service.Reliable(ctx)
|
onlineNodes, _, err := service.Reliable(ctx)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Len(t, nodes, 2)
|
require.Len(t, onlineNodes, 2)
|
||||||
|
|
||||||
err = planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, node.ID(), "FR")
|
err = planet.Satellites[0].Overlay.Service.TestNodeCountryCode(ctx, node.ID(), "FR")
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// first node should be excluded from Reliable result because of country code
|
// first node should be excluded from Reliable result because of country code
|
||||||
nodes, err = service.KnownReliableInExcludedCountries(ctx, nodes)
|
nodes, err := service.KnownReliableInExcludedCountries(ctx, storj.NodeIDList{onlineNodes[0].ID, onlineNodes[1].ID})
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Len(t, nodes, 1)
|
require.Len(t, nodes, 1)
|
||||||
require.Equal(t, node.ID(), nodes[0])
|
require.Equal(t, node.ID(), nodes[0])
|
||||||
|
@ -102,18 +102,13 @@ func testDatabase(ctx context.Context, t *testing.T, cache overlay.DB) {
|
|||||||
storj.NodeID{7}, storj.NodeID{8},
|
storj.NodeID{7}, storj.NodeID{8},
|
||||||
storj.NodeID{9},
|
storj.NodeID{9},
|
||||||
}
|
}
|
||||||
criteria := &overlay.NodeCriteria{
|
|
||||||
OnlineWindow: time.Hour,
|
|
||||||
ExcludedCountries: []string{"FR", "BE"},
|
|
||||||
}
|
|
||||||
|
|
||||||
contains := func(nodeID storj.NodeID) func(node uploadselection.SelectedNode) bool {
|
contains := func(nodeID storj.NodeID) func(node uploadselection.SelectedNode) bool {
|
||||||
return func(node uploadselection.SelectedNode) bool {
|
return func(node uploadselection.SelectedNode) bool {
|
||||||
return node.ID == nodeID
|
return node.ID == nodeID
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
online, offline, err := cache.KnownReliable(ctx, nodeIds, criteria.OnlineWindow, criteria.AsOfSystemInterval)
|
online, offline, err := cache.KnownReliable(ctx, nodeIds, time.Hour, 0)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// unrealiable nodes shouldn't be in results
|
// unrealiable nodes shouldn't be in results
|
||||||
@ -124,19 +119,26 @@ func testDatabase(ctx context.Context, t *testing.T, cache overlay.DB) {
|
|||||||
require.False(t, slices.ContainsFunc(append(online, offline...), contains(storj.NodeID{9}))) // not in db
|
require.False(t, slices.ContainsFunc(append(online, offline...), contains(storj.NodeID{9}))) // not in db
|
||||||
|
|
||||||
require.True(t, slices.ContainsFunc(offline, contains(storj.NodeID{4}))) // offline
|
require.True(t, slices.ContainsFunc(offline, contains(storj.NodeID{4}))) // offline
|
||||||
|
// KnownReliable is not excluding by country anymore
|
||||||
|
require.True(t, slices.ContainsFunc(online, contains(storj.NodeID{7}))) // excluded country
|
||||||
|
|
||||||
require.Len(t, append(online, offline...), 4)
|
require.Len(t, append(online, offline...), 4)
|
||||||
|
|
||||||
valid, err := cache.Reliable(ctx, criteria)
|
online, offline, err = cache.Reliable(ctx, time.Hour, 0)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
require.NotContains(t, valid, storj.NodeID{2}) // disqualified
|
require.False(t, slices.ContainsFunc(append(online, offline...), contains(storj.NodeID{2}))) // disqualified
|
||||||
require.NotContains(t, valid, storj.NodeID{3}) // unknown audit suspended
|
require.False(t, slices.ContainsFunc(append(online, offline...), contains(storj.NodeID{3}))) // unknown audit suspended
|
||||||
require.NotContains(t, valid, storj.NodeID{4}) // offline
|
|
||||||
require.NotContains(t, valid, storj.NodeID{5}) // gracefully exited
|
require.False(t, slices.ContainsFunc(append(online, offline...), contains(storj.NodeID{5}))) // gracefully exited
|
||||||
require.NotContains(t, valid, storj.NodeID{6}) // offline suspended
|
require.False(t, slices.ContainsFunc(append(online, offline...), contains(storj.NodeID{6}))) // offline suspended
|
||||||
require.NotContains(t, valid, storj.NodeID{7}) // excluded country
|
require.False(t, slices.ContainsFunc(append(online, offline...), contains(storj.NodeID{9}))) // not in db
|
||||||
require.NotContains(t, valid, storj.NodeID{9}) // not in db
|
|
||||||
require.Len(t, valid, 2)
|
require.True(t, slices.ContainsFunc(offline, contains(storj.NodeID{4}))) // offline
|
||||||
|
// Reliable is not excluding by country anymore
|
||||||
|
require.True(t, slices.ContainsFunc(online, contains(storj.NodeID{7}))) // excluded country
|
||||||
|
|
||||||
|
require.Len(t, append(online, offline...), 4)
|
||||||
}
|
}
|
||||||
|
|
||||||
{ // TestUpdateOperator
|
{ // TestUpdateOperator
|
||||||
|
@ -156,6 +156,7 @@ func NewRangedLoop(log *zap.Logger, db DB, metabaseDB *metabase.DB, config *Conf
|
|||||||
peer.DB.RepairQueue(),
|
peer.DB.RepairQueue(),
|
||||||
peer.Overlay.Service,
|
peer.Overlay.Service,
|
||||||
config.Checker,
|
config.Checker,
|
||||||
|
config.Overlay.RepairExcludedCountryCodes,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,12 +51,13 @@ type Observer struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewObserver creates new checker observer instance.
|
// NewObserver creates new checker observer instance.
|
||||||
func NewObserver(logger *zap.Logger, repairQueue queue.RepairQueue, overlay *overlay.Service, config Config) *Observer {
|
// TODO move excludedCountries into config but share it somehow with segment repairer.
|
||||||
|
func NewObserver(logger *zap.Logger, repairQueue queue.RepairQueue, overlay *overlay.Service, config Config, excludedCountries []string) *Observer {
|
||||||
return &Observer{
|
return &Observer{
|
||||||
logger: logger,
|
logger: logger,
|
||||||
|
|
||||||
repairQueue: repairQueue,
|
repairQueue: repairQueue,
|
||||||
nodestate: NewReliabilityCache(overlay, config.ReliabilityCacheStaleness),
|
nodestate: NewReliabilityCache(overlay, config.ReliabilityCacheStaleness, excludedCountries),
|
||||||
overlayService: overlay,
|
overlayService: overlay,
|
||||||
repairOverrides: config.RepairOverrides.GetMap(),
|
repairOverrides: config.RepairOverrides.GetMap(),
|
||||||
nodeFailureRate: config.NodeFailureRate,
|
nodeFailureRate: config.NodeFailureRate,
|
||||||
|
@ -554,7 +554,7 @@ func BenchmarkRemoteSegment(b *testing.B) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
observer := checker.NewObserver(zap.NewNop(), planet.Satellites[0].DB.RepairQueue(),
|
observer := checker.NewObserver(zap.NewNop(), planet.Satellites[0].DB.RepairQueue(),
|
||||||
planet.Satellites[0].Auditor.Overlay, planet.Satellites[0].Config.Checker)
|
planet.Satellites[0].Auditor.Overlay, planet.Satellites[0].Config.Checker, []string{})
|
||||||
segments, err := planet.Satellites[0].Metabase.DB.TestingAllSegments(ctx)
|
segments, err := planet.Satellites[0].Metabase.DB.TestingAllSegments(ctx)
|
||||||
require.NoError(b, err)
|
require.NoError(b, err)
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"storj.io/common/storj"
|
"storj.io/common/storj"
|
||||||
|
"storj.io/common/storj/location"
|
||||||
"storj.io/storj/satellite/metabase"
|
"storj.io/storj/satellite/metabase"
|
||||||
"storj.io/storj/satellite/overlay"
|
"storj.io/storj/satellite/overlay"
|
||||||
)
|
)
|
||||||
@ -19,10 +20,11 @@ import (
|
|||||||
//
|
//
|
||||||
// architecture: Service
|
// architecture: Service
|
||||||
type ReliabilityCache struct {
|
type ReliabilityCache struct {
|
||||||
overlay *overlay.Service
|
overlay *overlay.Service
|
||||||
staleness time.Duration
|
staleness time.Duration
|
||||||
mu sync.Mutex
|
excludedCountryCodes map[location.CountryCode]struct{}
|
||||||
state atomic.Value // contains immutable *reliabilityState
|
mu sync.Mutex
|
||||||
|
state atomic.Value // contains immutable *reliabilityState
|
||||||
}
|
}
|
||||||
|
|
||||||
// reliabilityState.
|
// reliabilityState.
|
||||||
@ -32,10 +34,18 @@ type reliabilityState struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewReliabilityCache creates a new reliability checking cache.
|
// NewReliabilityCache creates a new reliability checking cache.
|
||||||
func NewReliabilityCache(overlay *overlay.Service, staleness time.Duration) *ReliabilityCache {
|
func NewReliabilityCache(overlay *overlay.Service, staleness time.Duration, excludedCountries []string) *ReliabilityCache {
|
||||||
|
excludedCountryCodes := make(map[location.CountryCode]struct{})
|
||||||
|
for _, countryCode := range excludedCountries {
|
||||||
|
if cc := location.ToCountryCode(countryCode); cc != location.None {
|
||||||
|
excludedCountryCodes[location.ToCountryCode(countryCode)] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return &ReliabilityCache{
|
return &ReliabilityCache{
|
||||||
overlay: overlay,
|
overlay: overlay,
|
||||||
staleness: staleness,
|
staleness: staleness,
|
||||||
|
excludedCountryCodes: excludedCountryCodes,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,17 +124,19 @@ func (cache *ReliabilityCache) Refresh(ctx context.Context) (err error) {
|
|||||||
func (cache *ReliabilityCache) refreshLocked(ctx context.Context) (_ *reliabilityState, err error) {
|
func (cache *ReliabilityCache) refreshLocked(ctx context.Context) (_ *reliabilityState, err error) {
|
||||||
defer mon.Task()(&ctx)(&err)
|
defer mon.Task()(&ctx)(&err)
|
||||||
|
|
||||||
nodes, err := cache.overlay.Reliable(ctx)
|
online, _, err := cache.overlay.Reliable(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, Error.Wrap(err)
|
return nil, Error.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
state := &reliabilityState{
|
state := &reliabilityState{
|
||||||
created: time.Now(),
|
created: time.Now(),
|
||||||
reliable: make(map[storj.NodeID]struct{}, len(nodes)),
|
reliable: make(map[storj.NodeID]struct{}, len(online)),
|
||||||
}
|
}
|
||||||
for _, id := range nodes {
|
for _, node := range online {
|
||||||
state.reliable[id] = struct{}{}
|
if _, ok := cache.excludedCountryCodes[node.CountryCode]; !ok {
|
||||||
|
state.reliable[node.ID] = struct{}{}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cache.state.Store(state)
|
cache.state.Store(state)
|
||||||
|
@ -12,11 +12,11 @@ import (
|
|||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
|
|
||||||
"storj.io/common/storj"
|
|
||||||
"storj.io/common/testcontext"
|
"storj.io/common/testcontext"
|
||||||
"storj.io/common/testrand"
|
"storj.io/common/testrand"
|
||||||
"storj.io/storj/satellite/metabase"
|
"storj.io/storj/satellite/metabase"
|
||||||
"storj.io/storj/satellite/nodeevents"
|
"storj.io/storj/satellite/nodeevents"
|
||||||
|
"storj.io/storj/satellite/nodeselection/uploadselection"
|
||||||
"storj.io/storj/satellite/overlay"
|
"storj.io/storj/satellite/overlay"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -35,7 +35,7 @@ func TestReliabilityCache_Concurrent(t *testing.T) {
|
|||||||
ctx.Go(func() error { return overlayCache.Run(cacheCtx) })
|
ctx.Go(func() error { return overlayCache.Run(cacheCtx) })
|
||||||
defer ctx.Check(overlayCache.Close)
|
defer ctx.Check(overlayCache.Close)
|
||||||
|
|
||||||
cache := NewReliabilityCache(overlayCache, time.Millisecond)
|
cache := NewReliabilityCache(overlayCache, time.Millisecond, []string{})
|
||||||
var group errgroup.Group
|
var group errgroup.Group
|
||||||
for i := 0; i < 10; i++ {
|
for i := 0; i < 10; i++ {
|
||||||
group.Go(func() error {
|
group.Go(func() error {
|
||||||
@ -55,11 +55,11 @@ func TestReliabilityCache_Concurrent(t *testing.T) {
|
|||||||
type fakeOverlayDB struct{ overlay.DB }
|
type fakeOverlayDB struct{ overlay.DB }
|
||||||
type fakeNodeEvents struct{ nodeevents.DB }
|
type fakeNodeEvents struct{ nodeevents.DB }
|
||||||
|
|
||||||
func (fakeOverlayDB) Reliable(context.Context, *overlay.NodeCriteria) (storj.NodeIDList, error) {
|
func (fakeOverlayDB) Reliable(context.Context, time.Duration, time.Duration) ([]uploadselection.SelectedNode, []uploadselection.SelectedNode, error) {
|
||||||
return storj.NodeIDList{
|
return []uploadselection.SelectedNode{
|
||||||
testrand.NodeID(),
|
{ID: testrand.NodeID()},
|
||||||
testrand.NodeID(),
|
{ID: testrand.NodeID()},
|
||||||
testrand.NodeID(),
|
{ID: testrand.NodeID()},
|
||||||
testrand.NodeID(),
|
{ID: testrand.NodeID()},
|
||||||
}, nil
|
}, nil, nil
|
||||||
}
|
}
|
||||||
|
@ -483,19 +483,11 @@ func (cache *overlaycache) knownReliable(ctx context.Context, nodeIDs storj.Node
|
|||||||
`, pgutil.NodeIDArray(nodeIDs), time.Now().Add(-onlineWindow),
|
`, pgutil.NodeIDArray(nodeIDs), time.Now().Add(-onlineWindow),
|
||||||
))(func(rows tagsql.Rows) error {
|
))(func(rows tagsql.Rows) error {
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
var onlineNode bool
|
node, onlineNode, err := scanSelectedNode(rows)
|
||||||
var node uploadselection.SelectedNode
|
|
||||||
node.Address = &pb.NodeAddress{}
|
|
||||||
var lastIPPort sql.NullString
|
|
||||||
err = rows.Scan(&node.ID, &node.Address.Address, &node.LastNet, &lastIPPort, &node.CountryCode, &onlineNode)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if lastIPPort.Valid {
|
|
||||||
node.LastIPPort = lastIPPort.String
|
|
||||||
}
|
|
||||||
|
|
||||||
if onlineNode {
|
if onlineNode {
|
||||||
online = append(online, node)
|
online = append(online, node)
|
||||||
} else {
|
} else {
|
||||||
@ -508,63 +500,67 @@ func (cache *overlaycache) knownReliable(ctx context.Context, nodeIDs storj.Node
|
|||||||
return online, offline, Error.Wrap(err)
|
return online, offline, Error.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reliable returns all reliable nodes.
|
// Reliable returns all nodes that are reliable, online and offline.
|
||||||
func (cache *overlaycache) Reliable(ctx context.Context, criteria *overlay.NodeCriteria) (nodes storj.NodeIDList, err error) {
|
func (cache *overlaycache) Reliable(ctx context.Context, onlineWindow, asOfSystemInterval time.Duration) (online []uploadselection.SelectedNode, offline []uploadselection.SelectedNode, err error) {
|
||||||
for {
|
for {
|
||||||
nodes, err = cache.reliable(ctx, criteria)
|
online, offline, err = cache.reliable(ctx, onlineWindow, asOfSystemInterval)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if cockroachutil.NeedsRetry(err) {
|
if cockroachutil.NeedsRetry(err) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
return nodes, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
return nodes, err
|
return online, offline, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cache *overlaycache) reliable(ctx context.Context, criteria *overlay.NodeCriteria) (nodes storj.NodeIDList, err error) {
|
func (cache *overlaycache) reliable(ctx context.Context, onlineWindow, asOfSystemInterval time.Duration) (online []uploadselection.SelectedNode, offline []uploadselection.SelectedNode, err error) {
|
||||||
args := []interface{}{
|
defer mon.Task()(&ctx)(&err)
|
||||||
time.Now().Add(-criteria.OnlineWindow),
|
|
||||||
}
|
|
||||||
|
|
||||||
// When this config is not set, it's a string slice with one empty string. I added some sanity checks to make sure we don't
|
err = withRows(cache.db.Query(ctx, `
|
||||||
// dereference a nil pointer or index an element that doesn't exist.
|
SELECT id, address, last_net, last_ip_port, country_code, last_contact_success > $1 as online
|
||||||
var excludedCountriesCondition string
|
|
||||||
if criteria.ExcludedCountries != nil && len(criteria.ExcludedCountries) != 0 && criteria.ExcludedCountries[0] != "" {
|
|
||||||
excludedCountriesCondition = "AND country_code NOT IN (SELECT UNNEST($2::TEXT[]))"
|
|
||||||
args = append(args, pgutil.TextArray(criteria.ExcludedCountries))
|
|
||||||
}
|
|
||||||
|
|
||||||
// get reliable and online nodes
|
|
||||||
rows, err := cache.db.Query(ctx, cache.db.Rebind(`
|
|
||||||
SELECT id
|
|
||||||
FROM nodes
|
FROM nodes
|
||||||
`+cache.db.impl.AsOfSystemInterval(criteria.AsOfSystemInterval)+`
|
`+cache.db.impl.AsOfSystemInterval(asOfSystemInterval)+`
|
||||||
WHERE disqualified IS NULL
|
WHERE disqualified IS NULL
|
||||||
AND unknown_audit_suspended IS NULL
|
AND unknown_audit_suspended IS NULL
|
||||||
AND offline_suspended IS NULL
|
AND offline_suspended IS NULL
|
||||||
AND exit_finished_at IS NULL
|
AND exit_finished_at IS NULL
|
||||||
AND last_contact_success > $1
|
`, time.Now().Add(-onlineWindow),
|
||||||
`+excludedCountriesCondition+`
|
))(func(rows tagsql.Rows) error {
|
||||||
`), args...)
|
for rows.Next() {
|
||||||
if err != nil {
|
node, onlineNode, err := scanSelectedNode(rows)
|
||||||
return nil, err
|
if err != nil {
|
||||||
}
|
return err
|
||||||
defer func() {
|
}
|
||||||
err = errs.Combine(err, rows.Close())
|
|
||||||
}()
|
|
||||||
|
|
||||||
for rows.Next() {
|
if onlineNode {
|
||||||
var id storj.NodeID
|
online = append(online, node)
|
||||||
err = rows.Scan(&id)
|
} else {
|
||||||
if err != nil {
|
offline = append(offline, node)
|
||||||
return nil, err
|
}
|
||||||
}
|
}
|
||||||
nodes = append(nodes, id)
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
return online, offline, Error.Wrap(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func scanSelectedNode(rows tagsql.Rows) (uploadselection.SelectedNode, bool, error) {
|
||||||
|
var onlineNode bool
|
||||||
|
var node uploadselection.SelectedNode
|
||||||
|
node.Address = &pb.NodeAddress{}
|
||||||
|
var lastIPPort sql.NullString
|
||||||
|
err := rows.Scan(&node.ID, &node.Address.Address, &node.LastNet, &lastIPPort, &node.CountryCode, &onlineNode)
|
||||||
|
if err != nil {
|
||||||
|
return uploadselection.SelectedNode{}, false, err
|
||||||
}
|
}
|
||||||
return nodes, Error.Wrap(rows.Err())
|
|
||||||
|
if lastIPPort.Valid {
|
||||||
|
node.LastIPPort = lastIPPort.String
|
||||||
|
}
|
||||||
|
return node, onlineNode, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateReputation updates the DB columns for any of the reputation fields in ReputationUpdate.
|
// UpdateReputation updates the DB columns for any of the reputation fields in ReputationUpdate.
|
||||||
|
@ -496,8 +496,59 @@ func TestOverlayCache_KnownReliable(t *testing.T) {
|
|||||||
require.ElementsMatch(t, tc.Offline, offline)
|
require.ElementsMatch(t, tc.Offline, offline)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// test empty id list
|
||||||
_, _, err := cache.KnownReliable(ctx, storj.NodeIDList{}, 1*time.Hour, 0)
|
_, _, err := cache.KnownReliable(ctx, storj.NodeIDList{}, 1*time.Hour, 0)
|
||||||
require.Error(t, err)
|
require.Error(t, err)
|
||||||
|
|
||||||
|
// test as of system time
|
||||||
|
_, _, err = cache.KnownReliable(ctx, ids(allNodes...), 1*time.Hour, -1*time.Microsecond)
|
||||||
|
require.NoError(t, err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOverlayCache_Reliable(t *testing.T) {
|
||||||
|
satellitedbtest.Run(t, func(ctx *testcontext.Context, t *testing.T, db satellite.DB) {
|
||||||
|
cache := db.OverlayCache()
|
||||||
|
|
||||||
|
allNodes := []uploadselection.SelectedNode{
|
||||||
|
addNode(ctx, t, cache, "online", "127.0.0.1", true, false, false, false, false),
|
||||||
|
addNode(ctx, t, cache, "offline", "127.0.0.2", false, false, false, false, false),
|
||||||
|
addNode(ctx, t, cache, "disqalified", "127.0.0.3", false, true, false, false, false),
|
||||||
|
addNode(ctx, t, cache, "audit-suspended", "127.0.0.4", false, false, true, false, false),
|
||||||
|
addNode(ctx, t, cache, "offline-suspended", "127.0.0.5", false, false, false, true, false),
|
||||||
|
addNode(ctx, t, cache, "exited", "127.0.0.6", false, false, false, false, true),
|
||||||
|
}
|
||||||
|
|
||||||
|
type testCase struct {
|
||||||
|
OnlineWindow time.Duration
|
||||||
|
Online []uploadselection.SelectedNode
|
||||||
|
Offline []uploadselection.SelectedNode
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, tc := range []testCase{
|
||||||
|
{
|
||||||
|
OnlineWindow: 1 * time.Hour,
|
||||||
|
Online: []uploadselection.SelectedNode{allNodes[0]},
|
||||||
|
Offline: []uploadselection.SelectedNode{allNodes[1]},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
OnlineWindow: 20 * time.Hour,
|
||||||
|
Online: []uploadselection.SelectedNode{allNodes[0], allNodes[1]},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
OnlineWindow: 1 * time.Microsecond,
|
||||||
|
Offline: []uploadselection.SelectedNode{allNodes[0], allNodes[1]},
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
online, offline, err := cache.Reliable(ctx, tc.OnlineWindow, 0)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.ElementsMatch(t, tc.Online, online, "#%d", i)
|
||||||
|
require.ElementsMatch(t, tc.Offline, offline, "#%d", i)
|
||||||
|
}
|
||||||
|
|
||||||
|
// test as of system time
|
||||||
|
_, _, err := cache.Reliable(ctx, 1*time.Hour, -1*time.Microsecond)
|
||||||
|
require.NoError(t, err)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user