satellite/metainfo/metabase: refresh alias cache only once

When there are concurrent refreshes to the cache and the entries are
missing, it could end up causing multiple database calls, even though
only one is needed.

Change-Id: I1ae7a124bbdd1570473cf3a032d375d2f25a8426
This commit is contained in:
Egon Elbre 2021-02-16 14:20:38 +02:00
parent 259b030b3a
commit 61f0fb67a9
2 changed files with 130 additions and 24 deletions

View File

@ -6,6 +6,7 @@ package metabase
import (
"context"
"sync"
"sync/atomic"
"storj.io/common/storj"
)
@ -18,26 +19,29 @@ type NodeAliasDB interface {
// NodeAliasCache is a write-through cache for looking up node ID and alias mapping.
type NodeAliasCache struct {
mu sync.Mutex
latest *NodeAliasMap
db NodeAliasDB
db NodeAliasDB
refreshing sync.Mutex
latest atomic.Value // *NodeAliasMap
}
// NewNodeAliasCache creates a new cache using the specified database.
func NewNodeAliasCache(db NodeAliasDB) *NodeAliasCache {
return &NodeAliasCache{
db: db,
latest: NewNodeAliasMap(nil),
cache := &NodeAliasCache{
db: db,
}
cache.latest.Store(NewNodeAliasMap(nil))
return cache
}
func (cache *NodeAliasCache) getLatest() *NodeAliasMap {
return cache.latest.Load().(*NodeAliasMap)
}
// Nodes returns node ID-s corresponding to the aliases,
// refreshing the cache once when an alias is missing.
// This results in an error when the alias is not in the database.
func (cache *NodeAliasCache) Nodes(ctx context.Context, aliases []NodeAlias) ([]storj.NodeID, error) {
cache.mu.Lock()
latest := cache.latest
cache.mu.Unlock()
latest := cache.getLatest()
nodes, missing := latest.Nodes(aliases)
if len(missing) == 0 {
@ -46,7 +50,7 @@ func (cache *NodeAliasCache) Nodes(ctx context.Context, aliases []NodeAlias) ([]
if len(missing) > 0 {
var err error
latest, err = cache.refresh(ctx)
latest, err = cache.refresh(ctx, nil, missing)
if err != nil {
return nil, Error.New("failed to refresh node alias db: %w", err)
}
@ -63,9 +67,7 @@ func (cache *NodeAliasCache) Nodes(ctx context.Context, aliases []NodeAlias) ([]
// Aliases returns node aliases corresponding to the node ID-s,
// adding missing node ID-s to the database when needed.
func (cache *NodeAliasCache) Aliases(ctx context.Context, nodes []storj.NodeID) ([]NodeAlias, error) {
cache.mu.Lock()
latest := cache.latest
cache.mu.Unlock()
latest := cache.getLatest()
aliases, missing := latest.Aliases(nodes)
if len(missing) == 0 {
@ -97,33 +99,39 @@ func (cache *NodeAliasCache) ensure(ctx context.Context, missing ...storj.NodeID
}); err != nil {
return nil, Error.New("failed to update node alias db: %w", err)
}
return cache.refresh(ctx)
return cache.refresh(ctx, missing, nil)
}
// refresh refreshses the state of the cache.
func (cache *NodeAliasCache) refresh(ctx context.Context) (_ *NodeAliasMap, err error) {
func (cache *NodeAliasCache) refresh(ctx context.Context, missingNodes []storj.NodeID, missingAliases []NodeAlias) (_ *NodeAliasMap, err error) {
defer mon.Task()(&ctx)(&err)
// TODO: allow only one inflight request
cache.refreshing.Lock()
defer cache.refreshing.Unlock()
latest := cache.getLatest()
// Maybe some other goroutine already refreshed the list, double-check.
if latest.ContainsAll(missingNodes, missingAliases) {
return latest, nil
}
entries, err := cache.db.ListNodeAliases(ctx)
if err != nil {
return nil, err
}
xs := NewNodeAliasMap(entries)
cache.mu.Lock()
defer cache.mu.Unlock()
// Since we never remove node aliases we can assume that the alias map that contains more
// entries is the latest one.
//
// Note: we merge the maps here rather than directly replacing.
// This is not ideal from performance side, however it should reduce possible consistency issues.
xs.Merge(cache.latest)
cache.latest = xs
return cache.latest, nil
xs := NewNodeAliasMap(entries)
xs.Merge(latest)
cache.latest.Store(xs)
return xs, nil
}
// ConvertPiecesToAliases converts pieces to alias pieces.
@ -227,6 +235,21 @@ func (m *NodeAliasMap) Aliases(nodes []storj.NodeID) (xs []NodeAlias, missing []
return xs, missing
}
// ContainsAll returns true when the table contains all entries.
func (m *NodeAliasMap) ContainsAll(nodeIDs []storj.NodeID, nodeAliases []NodeAlias) bool {
for _, id := range nodeIDs {
if _, ok := m.alias[id]; !ok {
return false
}
}
for _, alias := range nodeAliases {
if _, ok := m.node[alias]; !ok {
return false
}
}
return true
}
// Size returns the number of entries in this map.
func (m *NodeAliasMap) Size() int {
if m == nil {

View File

@ -7,9 +7,11 @@ import (
"context"
"errors"
"sync"
"sync/atomic"
"testing"
"github.com/stretchr/testify/require"
"golang.org/x/sync/errgroup"
"storj.io/common/storj"
"storj.io/common/testcontext"
@ -66,6 +68,72 @@ func TestNodeAliasCache(t *testing.T) {
require.EqualError(t, err, "metabase: failed to refresh node alias db: io.EOF")
require.Empty(t, nodes)
})
t.Run("Aliases refresh once", func(t *testing.T) {
for repeat := 0; repeat < 3; repeat++ {
database := &NodeAliasDB{}
cache := metabase.NewNodeAliasCache(database)
n1, n2 := testrand.NodeID(), testrand.NodeID()
start := make(chan struct{})
const N = 4
var waiting sync.WaitGroup
waiting.Add(N)
var group errgroup.Group
for k := 0; k < N; k++ {
group.Go(func() error {
waiting.Done()
<-start
_, err := cache.Aliases(ctx, []storj.NodeID{n1, n2})
return err
})
}
waiting.Wait()
close(start)
require.NoError(t, group.Wait())
require.Equal(t, int64(1), database.ListNodeAliasesCount())
}
})
t.Run("Nodes refresh once", func(t *testing.T) {
for repeat := 0; repeat < 3; repeat++ {
n1, n2 := testrand.NodeID(), testrand.NodeID()
database := &NodeAliasDB{}
err := database.EnsureNodeAliases(ctx, metabase.EnsureNodeAliases{
Nodes: []storj.NodeID{n1, n2},
})
require.NoError(t, err)
cache := metabase.NewNodeAliasCache(database)
start := make(chan struct{})
const N = 4
var waiting sync.WaitGroup
waiting.Add(N)
var group errgroup.Group
for k := 0; k < N; k++ {
group.Go(func() error {
waiting.Done()
<-start
_, err := cache.Nodes(ctx, []metabase.NodeAlias{1, 2})
return err
})
}
waiting.Wait()
close(start)
require.NoError(t, group.Wait())
require.Equal(t, int64(1), database.ListNodeAliasesCount())
}
})
}
func TestNodeAliasCache_DB(t *testing.T) {
@ -204,6 +272,9 @@ type NodeAliasDB struct {
fail error
last metabase.NodeAlias
entries []metabase.NodeAliasEntry
ensureNodeAliasesCount int64
listNodeAliasesCount int64
}
func (db *NodeAliasDB) SetFail(err error) {
@ -236,6 +307,8 @@ func (db *NodeAliasDB) Ensure(id storj.NodeID) {
}
func (db *NodeAliasDB) EnsureNodeAliases(ctx context.Context, opts metabase.EnsureNodeAliases) error {
atomic.AddInt64(&db.ensureNodeAliasesCount, 1)
if err := db.ShouldFail(); err != nil {
return err
}
@ -245,7 +318,13 @@ func (db *NodeAliasDB) EnsureNodeAliases(ctx context.Context, opts metabase.Ensu
return nil
}
func (db *NodeAliasDB) EnsureNodeAliasesCount() int64 {
return atomic.LoadInt64(&db.ensureNodeAliasesCount)
}
func (db *NodeAliasDB) ListNodeAliases(ctx context.Context) (_ []metabase.NodeAliasEntry, err error) {
atomic.AddInt64(&db.listNodeAliasesCount, 1)
if err := db.ShouldFail(); err != nil {
return nil, err
}
@ -256,3 +335,7 @@ func (db *NodeAliasDB) ListNodeAliases(ctx context.Context) (_ []metabase.NodeAl
return xs, nil
}
func (db *NodeAliasDB) ListNodeAliasesCount() int64 {
return atomic.LoadInt64(&db.listNodeAliasesCount)
}