5ea1602ca5
* init implementation cache Change-Id: Ia54a1943e0707a77189bc5f4a9aaa8339c98d99a * one query to init cache Change-Id: I7c04b3ae104b553ae23fca372351a4328f632c66 * add monit tracking of cache Change-Id: I7d209e12c8f32d43708b23bf2126c5d5098e0a07 * add first test Change-Id: I0646a9349d457a9eb3920f7cd2d62fb72ffc3ab5 * add staleness to cache Change-Id: If002329bfdd53a4b200ad14dbd2ffc8b280aedb8 * add init test Change-Id: I3a3d0aa74cfac1d125fa93cb749316ed2a74d5b1 * fix comment Change-Id: I73353d00ccf0952b38c0f8ef7d1755c15cbfe9d9 * mv to nodeselection pkg Change-Id: I62487f768296c7a7b597fa398a4c42daf6e9c5b7 * add state to cache Change-Id: I081e77ec0e16706faee1a267de9a7fa643d6ac11 * add refresh concurrent test Change-Id: Idcba72508291099f280edc65355273c0acc3d3ce * add a few more tests Change-Id: I9422e9eaa22bf01c11f14bdb892ebcf7b3e5e5fb * fix tests, add min version to select allnodes Change-Id: I926f41d568951ad4ff70c6d4ceb87abb1e3e5009 * update comments Change-Id: I6ffe33e245ca65fb523c880cd72e63ce35776eb9 * fixes and rm Init Change-Id: Ifbe09b668978b5d9af09ca38cb080d02a2154cf4 * fix format Change-Id: I03cc217e28dc1839190c5c6dbdbb602c132a5a38
196 lines
6.2 KiB
Go
196 lines
6.2 KiB
Go
// Copyright (C) 2019 Storj Labs, Incache.
|
|
// See LICENSE for copying information.
|
|
|
|
package overlay
|
|
|
|
import (
|
|
"context"
|
|
"math/rand"
|
|
"sync"
|
|
"time"
|
|
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// CacheDB implements the database for overlay node selection cache
|
|
//
|
|
// architecture: Database
|
|
type CacheDB interface {
|
|
// SelectAllStorageNodesUpload returns all nodes that qualify to store data, organized as reputable nodes and new nodes
|
|
SelectAllStorageNodesUpload(ctx context.Context, selectionCfg NodeSelectionConfig) (reputable, new []*SelectedNode, err error)
|
|
}
|
|
|
|
// CacheConfig is a configuration for overlay node selection cache.
|
|
type CacheConfig struct {
|
|
Staleness time.Duration `help:"how stale the node selection cache can be" releaseDefault:"3m" devDefault:"5m"`
|
|
}
|
|
|
|
// NodeSelectionCache keeps a list of all the storage nodes that are qualified to store data
|
|
// We organize the nodes by if they are reputable or a new node on the network.
|
|
// The cache will sync with the nodes table in the database and get refreshed once the staleness time has past.
|
|
type NodeSelectionCache struct {
|
|
log *zap.Logger
|
|
db CacheDB
|
|
selectionConfig NodeSelectionConfig
|
|
staleness time.Duration
|
|
|
|
mu sync.RWMutex
|
|
data *state
|
|
}
|
|
|
|
type state struct {
|
|
lastRefresh time.Time
|
|
|
|
mu sync.RWMutex
|
|
reputableNodes []*SelectedNode
|
|
newNodes []*SelectedNode
|
|
}
|
|
|
|
// NewNodeSelectionCache creates a new cache that keeps a list of all the storage nodes that are qualified to store data
|
|
func NewNodeSelectionCache(log *zap.Logger, db CacheDB, staleness time.Duration, config NodeSelectionConfig) *NodeSelectionCache {
|
|
return &NodeSelectionCache{
|
|
log: log,
|
|
db: db,
|
|
staleness: staleness,
|
|
selectionConfig: config,
|
|
data: &state{},
|
|
}
|
|
}
|
|
|
|
// Refresh populates the cache with all of the reputableNodes and newNode nodes
|
|
// This method is useful for tests
|
|
func (cache *NodeSelectionCache) Refresh(ctx context.Context) (err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
_, err = cache.refresh(ctx)
|
|
return err
|
|
}
|
|
|
|
// refresh calls out to the database and refreshes the cache with the most up-to-date
|
|
// data from the nodes table, then sets time that the last refresh occurred so we know when
|
|
// to refresh again in the future
|
|
func (cache *NodeSelectionCache) refresh(ctx context.Context) (cachData *state, err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
cache.mu.Lock()
|
|
defer cache.mu.Unlock()
|
|
|
|
if cache.data != nil && time.Since(cache.data.lastRefresh) <= cache.staleness {
|
|
return cache.data, nil
|
|
}
|
|
|
|
reputableNodes, newNodes, err := cache.db.SelectAllStorageNodesUpload(ctx, cache.selectionConfig)
|
|
if err != nil {
|
|
return cache.data, err
|
|
}
|
|
cache.data = &state{
|
|
lastRefresh: time.Now().UTC(),
|
|
reputableNodes: reputableNodes,
|
|
newNodes: newNodes,
|
|
}
|
|
|
|
mon.IntVal("refresh_cache_size_reputable").Observe(int64(len(reputableNodes)))
|
|
mon.IntVal("refresh_cache_size_new").Observe(int64(len(newNodes)))
|
|
return cache.data, nil
|
|
}
|
|
|
|
// GetNodes selects nodes from the cache that will be used to upload a file.
|
|
// Every node selected will be from a distinct network.
|
|
// If the cache hasn't been refreshed recently it will do so first.
|
|
func (cache *NodeSelectionCache) GetNodes(ctx context.Context, req FindStorageNodesRequest) (_ []*SelectedNode, err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
cache.mu.RLock()
|
|
cacheData := cache.data
|
|
cache.mu.RUnlock()
|
|
|
|
// if the cache is stale, then refresh it before we get nodes
|
|
if time.Since(cacheData.lastRefresh) > cache.staleness {
|
|
cacheData, err = cache.refresh(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return cacheData.GetNodes(ctx, req, cache.selectionConfig.NewNodeFraction)
|
|
}
|
|
|
|
// GetNodes selects nodes from the cache that will be used to upload a file.
|
|
// If there are new nodes in the cache, we will return a small fraction of those
|
|
// and then return mostly reputable nodes
|
|
func (cacheData *state) GetNodes(ctx context.Context, req FindStorageNodesRequest, newNodeFraction float64) (_ []*SelectedNode, err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
cacheData.mu.RLock()
|
|
defer cacheData.mu.RUnlock()
|
|
|
|
// how many reputableNodes versus newNode nodes should be selected
|
|
totalcount := req.RequestedCount
|
|
newNodeCount := int(float64(req.RequestedCount) * newNodeFraction)
|
|
|
|
var selectedNodeResults = []*SelectedNode{}
|
|
var distinctNetworks = map[string]struct{}{}
|
|
|
|
// Get a random selection of new nodes out of the cache first so that if there aren't
|
|
// enough new nodes on the network, we can fall back to using reputable nodes instead
|
|
randomIndexes := rand.Perm(len(cacheData.newNodes))
|
|
for _, idx := range randomIndexes {
|
|
currNode := cacheData.newNodes[idx]
|
|
if _, ok := distinctNetworks[currNode.LastNet]; ok {
|
|
continue
|
|
}
|
|
for _, excludedID := range req.ExcludedIDs {
|
|
if excludedID == currNode.ID {
|
|
continue
|
|
}
|
|
}
|
|
|
|
selectedNodeResults = append(selectedNodeResults, currNode)
|
|
distinctNetworks[currNode.LastNet] = struct{}{}
|
|
if len(selectedNodeResults) >= newNodeCount {
|
|
break
|
|
}
|
|
}
|
|
|
|
randomIndexes = rand.Perm(len(cacheData.reputableNodes))
|
|
for _, idx := range randomIndexes {
|
|
currNode := cacheData.reputableNodes[idx]
|
|
|
|
// don't select a node if we've already selected another node from the same network
|
|
if _, ok := distinctNetworks[currNode.LastNet]; ok {
|
|
continue
|
|
}
|
|
// don't select a node listed in the excluded list
|
|
for _, excludedID := range req.ExcludedIDs {
|
|
if excludedID == currNode.ID {
|
|
continue
|
|
}
|
|
}
|
|
|
|
selectedNodeResults = append(selectedNodeResults, currNode)
|
|
distinctNetworks[currNode.LastNet] = struct{}{}
|
|
if len(selectedNodeResults) >= totalcount {
|
|
break
|
|
}
|
|
}
|
|
|
|
if len(selectedNodeResults) < totalcount {
|
|
return nil, Error.New("unable to select enough nodes from node selection cache, needed: %d, actual: %d",
|
|
totalcount, len(selectedNodeResults),
|
|
)
|
|
}
|
|
return selectedNodeResults, nil
|
|
}
|
|
|
|
// Size returns how many reputable nodes and new nodes are in the cache
|
|
func (cache *NodeSelectionCache) Size() (reputableNodeCount int, newNodeCount int) {
|
|
cache.mu.RLock()
|
|
cacheData := cache.data
|
|
cache.mu.RUnlock()
|
|
return cacheData.size()
|
|
}
|
|
|
|
func (cacheData *state) size() (reputableNodeCount int, newNodeCount int) {
|
|
cacheData.mu.RLock()
|
|
defer cacheData.mu.RUnlock()
|
|
return len(cacheData.reputableNodes), len(cacheData.newNodes)
|
|
}
|