satellite/metainfo/metabase: refresh alias cache only once
When there are concurrent refreshes to the cache and the entries are missing, it could end up causing multiple database calls, even though only one is needed. Change-Id: I1ae7a124bbdd1570473cf3a032d375d2f25a8426
This commit is contained in:
parent
259b030b3a
commit
61f0fb67a9
@ -6,6 +6,7 @@ package metabase
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"storj.io/common/storj"
|
||||
)
|
||||
@ -18,26 +19,29 @@ type NodeAliasDB interface {
|
||||
|
||||
// NodeAliasCache is a write-through cache for looking up node ID and alias mapping.
|
||||
type NodeAliasCache struct {
|
||||
mu sync.Mutex
|
||||
latest *NodeAliasMap
|
||||
db NodeAliasDB
|
||||
db NodeAliasDB
|
||||
refreshing sync.Mutex
|
||||
latest atomic.Value // *NodeAliasMap
|
||||
}
|
||||
|
||||
// NewNodeAliasCache creates a new cache using the specified database.
|
||||
func NewNodeAliasCache(db NodeAliasDB) *NodeAliasCache {
|
||||
return &NodeAliasCache{
|
||||
db: db,
|
||||
latest: NewNodeAliasMap(nil),
|
||||
cache := &NodeAliasCache{
|
||||
db: db,
|
||||
}
|
||||
cache.latest.Store(NewNodeAliasMap(nil))
|
||||
return cache
|
||||
}
|
||||
|
||||
func (cache *NodeAliasCache) getLatest() *NodeAliasMap {
|
||||
return cache.latest.Load().(*NodeAliasMap)
|
||||
}
|
||||
|
||||
// Nodes returns node ID-s corresponding to the aliases,
|
||||
// refreshing the cache once when an alias is missing.
|
||||
// This results in an error when the alias is not in the database.
|
||||
func (cache *NodeAliasCache) Nodes(ctx context.Context, aliases []NodeAlias) ([]storj.NodeID, error) {
|
||||
cache.mu.Lock()
|
||||
latest := cache.latest
|
||||
cache.mu.Unlock()
|
||||
latest := cache.getLatest()
|
||||
|
||||
nodes, missing := latest.Nodes(aliases)
|
||||
if len(missing) == 0 {
|
||||
@ -46,7 +50,7 @@ func (cache *NodeAliasCache) Nodes(ctx context.Context, aliases []NodeAlias) ([]
|
||||
|
||||
if len(missing) > 0 {
|
||||
var err error
|
||||
latest, err = cache.refresh(ctx)
|
||||
latest, err = cache.refresh(ctx, nil, missing)
|
||||
if err != nil {
|
||||
return nil, Error.New("failed to refresh node alias db: %w", err)
|
||||
}
|
||||
@ -63,9 +67,7 @@ func (cache *NodeAliasCache) Nodes(ctx context.Context, aliases []NodeAlias) ([]
|
||||
// Aliases returns node aliases corresponding to the node ID-s,
|
||||
// adding missing node ID-s to the database when needed.
|
||||
func (cache *NodeAliasCache) Aliases(ctx context.Context, nodes []storj.NodeID) ([]NodeAlias, error) {
|
||||
cache.mu.Lock()
|
||||
latest := cache.latest
|
||||
cache.mu.Unlock()
|
||||
latest := cache.getLatest()
|
||||
|
||||
aliases, missing := latest.Aliases(nodes)
|
||||
if len(missing) == 0 {
|
||||
@ -97,33 +99,39 @@ func (cache *NodeAliasCache) ensure(ctx context.Context, missing ...storj.NodeID
|
||||
}); err != nil {
|
||||
return nil, Error.New("failed to update node alias db: %w", err)
|
||||
}
|
||||
return cache.refresh(ctx)
|
||||
return cache.refresh(ctx, missing, nil)
|
||||
}
|
||||
|
||||
// refresh refreshses the state of the cache.
|
||||
func (cache *NodeAliasCache) refresh(ctx context.Context) (_ *NodeAliasMap, err error) {
|
||||
func (cache *NodeAliasCache) refresh(ctx context.Context, missingNodes []storj.NodeID, missingAliases []NodeAlias) (_ *NodeAliasMap, err error) {
|
||||
defer mon.Task()(&ctx)(&err)
|
||||
// TODO: allow only one inflight request
|
||||
|
||||
cache.refreshing.Lock()
|
||||
defer cache.refreshing.Unlock()
|
||||
|
||||
latest := cache.getLatest()
|
||||
|
||||
// Maybe some other goroutine already refreshed the list, double-check.
|
||||
if latest.ContainsAll(missingNodes, missingAliases) {
|
||||
return latest, nil
|
||||
}
|
||||
|
||||
entries, err := cache.db.ListNodeAliases(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
xs := NewNodeAliasMap(entries)
|
||||
|
||||
cache.mu.Lock()
|
||||
defer cache.mu.Unlock()
|
||||
|
||||
// Since we never remove node aliases we can assume that the alias map that contains more
|
||||
// entries is the latest one.
|
||||
//
|
||||
// Note: we merge the maps here rather than directly replacing.
|
||||
// This is not ideal from performance side, however it should reduce possible consistency issues.
|
||||
xs.Merge(cache.latest)
|
||||
cache.latest = xs
|
||||
|
||||
return cache.latest, nil
|
||||
xs := NewNodeAliasMap(entries)
|
||||
xs.Merge(latest)
|
||||
cache.latest.Store(xs)
|
||||
|
||||
return xs, nil
|
||||
}
|
||||
|
||||
// ConvertPiecesToAliases converts pieces to alias pieces.
|
||||
@ -227,6 +235,21 @@ func (m *NodeAliasMap) Aliases(nodes []storj.NodeID) (xs []NodeAlias, missing []
|
||||
return xs, missing
|
||||
}
|
||||
|
||||
// ContainsAll returns true when the table contains all entries.
|
||||
func (m *NodeAliasMap) ContainsAll(nodeIDs []storj.NodeID, nodeAliases []NodeAlias) bool {
|
||||
for _, id := range nodeIDs {
|
||||
if _, ok := m.alias[id]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
for _, alias := range nodeAliases {
|
||||
if _, ok := m.node[alias]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Size returns the number of entries in this map.
|
||||
func (m *NodeAliasMap) Size() int {
|
||||
if m == nil {
|
||||
|
@ -7,9 +7,11 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"storj.io/common/storj"
|
||||
"storj.io/common/testcontext"
|
||||
@ -66,6 +68,72 @@ func TestNodeAliasCache(t *testing.T) {
|
||||
require.EqualError(t, err, "metabase: failed to refresh node alias db: io.EOF")
|
||||
require.Empty(t, nodes)
|
||||
})
|
||||
|
||||
t.Run("Aliases refresh once", func(t *testing.T) {
|
||||
for repeat := 0; repeat < 3; repeat++ {
|
||||
database := &NodeAliasDB{}
|
||||
cache := metabase.NewNodeAliasCache(database)
|
||||
n1, n2 := testrand.NodeID(), testrand.NodeID()
|
||||
|
||||
start := make(chan struct{})
|
||||
const N = 4
|
||||
var waiting sync.WaitGroup
|
||||
waiting.Add(N)
|
||||
|
||||
var group errgroup.Group
|
||||
for k := 0; k < N; k++ {
|
||||
group.Go(func() error {
|
||||
waiting.Done()
|
||||
<-start
|
||||
|
||||
_, err := cache.Aliases(ctx, []storj.NodeID{n1, n2})
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
waiting.Wait()
|
||||
close(start)
|
||||
require.NoError(t, group.Wait())
|
||||
|
||||
require.Equal(t, int64(1), database.ListNodeAliasesCount())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Nodes refresh once", func(t *testing.T) {
|
||||
for repeat := 0; repeat < 3; repeat++ {
|
||||
n1, n2 := testrand.NodeID(), testrand.NodeID()
|
||||
|
||||
database := &NodeAliasDB{}
|
||||
err := database.EnsureNodeAliases(ctx, metabase.EnsureNodeAliases{
|
||||
Nodes: []storj.NodeID{n1, n2},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache := metabase.NewNodeAliasCache(database)
|
||||
|
||||
start := make(chan struct{})
|
||||
const N = 4
|
||||
var waiting sync.WaitGroup
|
||||
waiting.Add(N)
|
||||
|
||||
var group errgroup.Group
|
||||
for k := 0; k < N; k++ {
|
||||
group.Go(func() error {
|
||||
waiting.Done()
|
||||
<-start
|
||||
|
||||
_, err := cache.Nodes(ctx, []metabase.NodeAlias{1, 2})
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
waiting.Wait()
|
||||
close(start)
|
||||
require.NoError(t, group.Wait())
|
||||
|
||||
require.Equal(t, int64(1), database.ListNodeAliasesCount())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestNodeAliasCache_DB(t *testing.T) {
|
||||
@ -204,6 +272,9 @@ type NodeAliasDB struct {
|
||||
fail error
|
||||
last metabase.NodeAlias
|
||||
entries []metabase.NodeAliasEntry
|
||||
|
||||
ensureNodeAliasesCount int64
|
||||
listNodeAliasesCount int64
|
||||
}
|
||||
|
||||
func (db *NodeAliasDB) SetFail(err error) {
|
||||
@ -236,6 +307,8 @@ func (db *NodeAliasDB) Ensure(id storj.NodeID) {
|
||||
}
|
||||
|
||||
func (db *NodeAliasDB) EnsureNodeAliases(ctx context.Context, opts metabase.EnsureNodeAliases) error {
|
||||
atomic.AddInt64(&db.ensureNodeAliasesCount, 1)
|
||||
|
||||
if err := db.ShouldFail(); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -245,7 +318,13 @@ func (db *NodeAliasDB) EnsureNodeAliases(ctx context.Context, opts metabase.Ensu
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *NodeAliasDB) EnsureNodeAliasesCount() int64 {
|
||||
return atomic.LoadInt64(&db.ensureNodeAliasesCount)
|
||||
}
|
||||
|
||||
func (db *NodeAliasDB) ListNodeAliases(ctx context.Context) (_ []metabase.NodeAliasEntry, err error) {
|
||||
atomic.AddInt64(&db.listNodeAliasesCount, 1)
|
||||
|
||||
if err := db.ShouldFail(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -256,3 +335,7 @@ func (db *NodeAliasDB) ListNodeAliases(ctx context.Context) (_ []metabase.NodeAl
|
||||
|
||||
return xs, nil
|
||||
}
|
||||
|
||||
func (db *NodeAliasDB) ListNodeAliasesCount() int64 {
|
||||
return atomic.LoadInt64(&db.listNodeAliasesCount)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user