de4559d862
Optimization by reusing more slices. Benchmark result: name old time/op new time/op delta RemoteSegment/healthy_segment-8 33.2µs ± 1% 31.4µs ± 6% -5.49% (p=0.032 n=4+5) name old alloc/op new alloc/op delta RemoteSegment/healthy_segment-8 15.9kB ± 0% 10.2kB ± 0% -35.92% (p=0.008 n=5+5) name old allocs/op new allocs/op delta RemoteSegment/healthy_segment-8 280 ± 0% 250 ± 0% -10.71% (p=0.008 n=5+5) Change-Id: I60462169285462dee6cd16d4f4ce1f30fb6cdfdf
143 lines
4.7 KiB
Go
143 lines
4.7 KiB
Go
// Copyright (C) 2019 Storj Labs, Inc.
|
|
// See LICENSE for copying information.
|
|
|
|
package checker
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/zeebo/errs"
|
|
|
|
"storj.io/common/storj"
|
|
"storj.io/storj/satellite/nodeselection"
|
|
"storj.io/storj/satellite/overlay"
|
|
)
|
|
|
|
// ReliabilityCache caches known nodes for the specified staleness duration
|
|
// and updates automatically from overlay.
|
|
//
|
|
// architecture: Service
|
|
type ReliabilityCache struct {
|
|
overlay *overlay.Service
|
|
staleness time.Duration
|
|
mu sync.Mutex
|
|
state atomic.Value // contains immutable *reliabilityState
|
|
}
|
|
|
|
// reliabilityState.
|
|
type reliabilityState struct {
|
|
nodeByID map[storj.NodeID]nodeselection.SelectedNode
|
|
created time.Time
|
|
}
|
|
|
|
// NewReliabilityCache creates a new reliability checking cache.
|
|
func NewReliabilityCache(overlay *overlay.Service, staleness time.Duration) *ReliabilityCache {
|
|
return &ReliabilityCache{
|
|
overlay: overlay,
|
|
staleness: staleness,
|
|
}
|
|
}
|
|
|
|
// LastUpdate returns when the cache was last updated, or the zero value (time.Time{}) if it
|
|
// has never yet been updated. LastUpdate() does not trigger an update itself.
|
|
func (cache *ReliabilityCache) LastUpdate() time.Time {
|
|
if state, ok := cache.state.Load().(*reliabilityState); ok {
|
|
return state.created
|
|
}
|
|
return time.Time{}
|
|
}
|
|
|
|
// NumNodes returns the number of online active nodes (as determined by the reliability cache).
|
|
// This number is not guaranteed to be consistent with either the nodes database or the
|
|
// reliability cache after returning; it is just a best-effort count and should be treated as an
|
|
// estimate.
|
|
func (cache *ReliabilityCache) NumNodes(ctx context.Context) (numNodes int, err error) {
|
|
state, err := cache.loadFast(ctx, time.Time{})
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
return len(state.nodeByID), nil
|
|
}
|
|
|
|
// GetNodes gets the cached SelectedNode records (valid as of the given time) for each of
|
|
// the requested node IDs, and returns them in order. If a node is not in the reliability
|
|
// cache (that is, it is unknown or disqualified), an empty SelectedNode will be returned
|
|
// for the index corresponding to that node ID.
|
|
// Slice selectedNodes will be filled with results nodes and returned. It's length must be
|
|
// equal to nodeIDs slice.
|
|
func (cache *ReliabilityCache) GetNodes(ctx context.Context, validUpTo time.Time, nodeIDs []storj.NodeID, selectedNodes []nodeselection.SelectedNode) ([]nodeselection.SelectedNode, error) {
|
|
state, err := cache.loadFast(ctx, validUpTo)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(nodeIDs) != len(selectedNodes) {
|
|
return nil, errs.New("nodeIDs length must be equal to selectedNodes: want %d have %d", len(nodeIDs), len(selectedNodes))
|
|
}
|
|
|
|
for i, nodeID := range nodeIDs {
|
|
selectedNodes[i] = state.nodeByID[nodeID]
|
|
}
|
|
return selectedNodes, nil
|
|
}
|
|
|
|
func (cache *ReliabilityCache) loadFast(ctx context.Context, validUpTo time.Time) (_ *reliabilityState, err error) {
|
|
// This code is designed to be very fast in the case where a refresh is not needed: just an
|
|
// atomic load from rarely written to bit of shared memory. The general strategy is to first
|
|
// read if the state suffices to answer the query. If not (due to it not existing, being
|
|
// too stale, etc.), then we acquire the mutex to block other requests that may be stale
|
|
// and ensure we only issue one refresh at a time. After acquiring the mutex, we have to
|
|
// double check that the state is still stale because some other call may have beat us to
|
|
// the acquisition. Only then do we refresh and can then proceed answering the query.
|
|
|
|
state, ok := cache.state.Load().(*reliabilityState)
|
|
if !ok || validUpTo.After(state.created) || time.Since(state.created) > cache.staleness {
|
|
cache.mu.Lock()
|
|
state, ok = cache.state.Load().(*reliabilityState)
|
|
if !ok || validUpTo.After(state.created) || time.Since(state.created) > cache.staleness {
|
|
state, err = cache.refreshLocked(ctx)
|
|
}
|
|
cache.mu.Unlock()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
return state, nil
|
|
}
|
|
|
|
// Refresh refreshes the cache.
|
|
func (cache *ReliabilityCache) Refresh(ctx context.Context) (err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
cache.mu.Lock()
|
|
defer cache.mu.Unlock()
|
|
|
|
_, err = cache.refreshLocked(ctx)
|
|
return err
|
|
}
|
|
|
|
// refreshLocked does the refreshes assuming the write mutex is held.
|
|
func (cache *ReliabilityCache) refreshLocked(ctx context.Context) (_ *reliabilityState, err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
selectedNodes, err := cache.overlay.GetParticipatingNodes(ctx)
|
|
if err != nil {
|
|
return nil, Error.Wrap(err)
|
|
}
|
|
|
|
state := &reliabilityState{
|
|
created: time.Now(),
|
|
nodeByID: make(map[storj.NodeID]nodeselection.SelectedNode, len(selectedNodes)),
|
|
}
|
|
for _, node := range selectedNodes {
|
|
state.nodeByID[node.ID] = node
|
|
}
|
|
|
|
cache.state.Store(state)
|
|
return state, nil
|
|
}
|