satellite/overlay: fix GetNodesNetworkInOrder
We were using the UploadSelectionCache previously, which does _not_ have all nodes, or even all online nodes, in it. So all nodes with less than MinimumVersion, or with less than MinimumDiskSpace, or nodes suspended for unknown audit errors, or nodes that have started graceful exit, were all missing, and ended up having empty last_nets. Even with all that, I'm kind of surprised how many nodes this involved, but using the upload selection cache was definitely wrong. This change uses the download selection cache instead, which excludes nodes only when they are disqualified, gracefully exited (completely), or offline. Change-Id: Iaa07c988aa29c1eb05796ac48a6f19d69f5826c1
This commit is contained in:
parent
9b3488276d
commit
c856d45cc0
@ -129,22 +129,6 @@ func (state *State) Select(ctx context.Context, request Request) (_ []*Node, err
|
||||
return selected, nil
|
||||
}
|
||||
|
||||
// GetNodesNetwork returns the cached network for each given node ID.
|
||||
func (state *State) GetNodesNetwork(ctx context.Context, nodeIDs []storj.NodeID) (nets []string) {
|
||||
defer mon.Task()(&ctx)(nil)
|
||||
|
||||
state.mu.RLock()
|
||||
defer state.mu.RUnlock()
|
||||
|
||||
nets = make([]string, len(nodeIDs))
|
||||
for i, nodeID := range nodeIDs {
|
||||
if net, ok := state.netByID[nodeID]; ok {
|
||||
nets[i] = net
|
||||
}
|
||||
}
|
||||
return nets
|
||||
}
|
||||
|
||||
// Stats returns state information.
|
||||
func (state *State) Stats() Stats {
|
||||
state.mu.RLock()
|
||||
|
@ -430,7 +430,19 @@ func (service *Service) IsOnline(node *NodeDossier) bool {
|
||||
// requested node is not in the database, an empty string will be returned corresponding
|
||||
// to that node's last_net.
|
||||
func (service *Service) GetNodesNetworkInOrder(ctx context.Context, nodeIDs []storj.NodeID) (lastNets []string, err error) {
|
||||
return service.UploadSelectionCache.GetNodesNetwork(ctx, nodeIDs)
|
||||
defer mon.Task()(&ctx)(nil)
|
||||
|
||||
nodes, err := service.DownloadSelectionCache.GetNodes(ctx, nodeIDs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
lastNets = make([]string, len(nodeIDs))
|
||||
for i, nodeID := range nodeIDs {
|
||||
if selectedNode, ok := nodes[nodeID]; ok {
|
||||
lastNets[i] = selectedNode.LastNet
|
||||
}
|
||||
}
|
||||
return lastNets, nil
|
||||
}
|
||||
|
||||
// FindStorageNodesForGracefulExit searches the overlay network for nodes that meet the provided requirements for graceful-exit requests.
|
||||
|
@ -10,7 +10,6 @@ import (
|
||||
"go.uber.org/zap"
|
||||
|
||||
"storj.io/common/pb"
|
||||
"storj.io/common/storj"
|
||||
"storj.io/common/sync2"
|
||||
"storj.io/storj/satellite/nodeselection/uploadselection"
|
||||
)
|
||||
@ -117,15 +116,6 @@ func (cache *UploadSelectionCache) Size(ctx context.Context) (reputableNodeCount
|
||||
return stats.Reputable, stats.New, nil
|
||||
}
|
||||
|
||||
// GetNodesNetwork returns the cached network for each given node ID.
|
||||
func (cache *UploadSelectionCache) GetNodesNetwork(ctx context.Context, nodeIDs []storj.NodeID) (nets []string, err error) {
|
||||
state, err := cache.cache.Get(ctx, time.Now())
|
||||
if err != nil {
|
||||
return nil, Error.Wrap(err)
|
||||
}
|
||||
return state.GetNodesNetwork(ctx, nodeIDs), nil
|
||||
}
|
||||
|
||||
func convNodesToSelectedNodes(nodes []*uploadselection.Node) (xs []*SelectedNode) {
|
||||
for _, n := range nodes {
|
||||
xs = append(xs, &SelectedNode{
|
||||
|
@ -302,7 +302,7 @@ func TestCleanRepairQueueObserver(t *testing.T) {
|
||||
}
|
||||
|
||||
require.NoError(t, observer.RefreshReliabilityCache(ctx))
|
||||
require.NoError(t, planet.Satellites[0].RangedLoop.Overlay.Service.UploadSelectionCache.Refresh(ctx))
|
||||
require.NoError(t, planet.Satellites[0].RangedLoop.Overlay.Service.DownloadSelectionCache.Refresh(ctx))
|
||||
|
||||
// check that repair queue is empty to avoid false positive
|
||||
count, err := repairQueue.Count(ctx)
|
||||
@ -324,7 +324,7 @@ func TestCleanRepairQueueObserver(t *testing.T) {
|
||||
}
|
||||
|
||||
require.NoError(t, observer.RefreshReliabilityCache(ctx))
|
||||
require.NoError(t, planet.Satellites[0].RangedLoop.Overlay.Service.UploadSelectionCache.Refresh(ctx))
|
||||
require.NoError(t, planet.Satellites[0].RangedLoop.Overlay.Service.DownloadSelectionCache.Refresh(ctx))
|
||||
|
||||
// The checker will not insert/update the now healthy segments causing
|
||||
// them to be removed from the queue at the end of the checker iteration
|
||||
|
@ -3285,7 +3285,7 @@ func TestRepairClumpedPieces(t *testing.T) {
|
||||
}
|
||||
err = satellite.DB.OverlayCache().UpdateCheckIn(ctx, checkInInfo, time.Now().UTC(), overlay.NodeSelectionConfig{})
|
||||
require.NoError(t, err)
|
||||
err = satellite.RangedLoop.Overlay.Service.UploadSelectionCache.Refresh(ctx)
|
||||
err = satellite.RangedLoop.Overlay.Service.DownloadSelectionCache.Refresh(ctx)
|
||||
require.NoError(t, err)
|
||||
|
||||
// running repair checker again should put the segment into the repair queue
|
||||
|
Loading…
Reference in New Issue
Block a user