storj/satellite/gracefulexit/pathcollector.go
Ivan Fraixedes 7fb86617fc satellite/satellitedb: Use CRDB AS OF SYSTEM & batch for GE
Use the 'AS OF SYSTEM TIME' Cockroach DB clause for the Graceful Exit
(a.k.a GE) queries that count the delete the GE queue items of nodes
which have already exited the network.

Split the subquery used for deleting all the transfer queue items of
nodes which has exited when CRDB is used and batch the queries because
CRDB struggles when executing in a single query unlike Postgres.

The new test which has been added to this commit to verify the CRDB
batch logic for deleting all the transfer queue items of the exited
nodes has raised that the Enqueue method has to run in baches when CRDB
is used otherwise CRDB has return the error "driver: bad connection"
when a big a amount of items are passed to be enqueued. This error
didn't happen with the current test implementation it was with an
initial one that it was creating a big amount of exited nodes and
transfer queue items for those nodes.

Change-Id: I6a099cdbc515a240596bc93141fea3182c2e50a9
2021-05-07 13:09:19 -04:00

125 lines
3.6 KiB
Go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package gracefulexit
import (
"context"
"sync"
"github.com/zeebo/errs"
"go.uber.org/zap"
"storj.io/common/storj"
"storj.io/storj/satellite/metabase/metaloop"
"storj.io/uplink/private/eestream"
)
var _ metaloop.Observer = (*PathCollector)(nil)
// PathCollector uses the metainfo loop to add paths to node reservoirs.
//
// architecture: Observer
type PathCollector struct {
db DB
nodeIDMutex sync.Mutex
nodeIDStorage map[storj.NodeID]int64
buffer []TransferQueueItem
log *zap.Logger
batchSize int
}
// NewPathCollector instantiates a path collector.
func NewPathCollector(db DB, nodeIDs storj.NodeIDList, log *zap.Logger, batchSize int) *PathCollector {
buffer := make([]TransferQueueItem, 0, batchSize)
collector := &PathCollector{
db: db,
log: log,
buffer: buffer,
batchSize: batchSize,
}
if len(nodeIDs) > 0 {
collector.nodeIDStorage = make(map[storj.NodeID]int64, len(nodeIDs))
for _, nodeID := range nodeIDs {
collector.nodeIDStorage[nodeID] = 0
}
}
return collector
}
// LoopStarted is called at each start of a loop.
func (collector *PathCollector) LoopStarted(context.Context, metaloop.LoopInfo) (err error) {
return nil
}
// Flush persists the current buffer items to the database.
func (collector *PathCollector) Flush(ctx context.Context) (err error) {
return collector.flush(ctx, 1)
}
// RemoteSegment takes a remote segment found in metainfo and creates a graceful exit transfer queue item if it doesn't exist already.
func (collector *PathCollector) RemoteSegment(ctx context.Context, segment *metaloop.Segment) (err error) {
if len(collector.nodeIDStorage) == 0 {
return nil
}
collector.nodeIDMutex.Lock()
defer collector.nodeIDMutex.Unlock()
numPieces := len(segment.Pieces)
key := segment.Location.Encode()
for _, piece := range segment.Pieces {
if _, ok := collector.nodeIDStorage[piece.StorageNode]; !ok {
continue
}
redundancy, err := eestream.NewRedundancyStrategyFromStorj(segment.Redundancy)
if err != nil {
return err
}
pieceSize := eestream.CalcPieceSize(int64(segment.EncryptedSize), redundancy)
collector.nodeIDStorage[piece.StorageNode] += pieceSize
item := TransferQueueItem{
NodeID: piece.StorageNode,
Key: key,
PieceNum: int32(piece.Number),
RootPieceID: segment.RootPieceID,
DurabilityRatio: float64(numPieces) / float64(segment.Redundancy.TotalShares),
}
collector.log.Debug("adding piece to transfer queue.", zap.Stringer("Node ID", piece.StorageNode),
zap.ByteString("key", key), zap.Uint16("piece num", piece.Number),
zap.Int("num pieces", numPieces), zap.Int16("total possible pieces", segment.Redundancy.TotalShares))
collector.buffer = append(collector.buffer, item)
err = collector.flush(ctx, collector.batchSize)
if err != nil {
return err
}
}
return nil
}
// Object returns nil because the audit service does not interact with objects.
func (collector *PathCollector) Object(ctx context.Context, object *metaloop.Object) (err error) {
return nil
}
// InlineSegment returns nil because we're only auditing for storage nodes for now.
func (collector *PathCollector) InlineSegment(ctx context.Context, segment *metaloop.Segment) (err error) {
return nil
}
func (collector *PathCollector) flush(ctx context.Context, limit int) (err error) {
if len(collector.buffer) >= limit {
err = collector.db.Enqueue(ctx, collector.buffer, collector.batchSize)
collector.buffer = collector.buffer[:0]
return errs.Wrap(err)
}
return nil
}