storj/satellite/orders/orders_write_cache.go
Jeff Wendling 78c6d5bb32 satellite/satellitedb: reported_serials table for processing orders
this commit introduces the reported_serials table. its purpose is
to allow for blind writes into it as nodes report in so that we have
minimal contention. in order to continue to accurately account for
used bandwidth, though, we cannot immediately add the settled amount.
if we did, we would have to give up on blind writes.

the table's primary key is structured precisely so that we can quickly
find expired orders and so that we maximally benefit from rocksdb
path prefix compression. we do this by rounding the expires at time
forward to the next day, effectively giving us storagenode petnames
for free. and since there's no secondary index or foreign key
constraints, this design should use significantly less space than
the current used_serials table while also reducing contention.

after inserting the orders into the table, we have a chore that
periodically consumes all of the expired orders in it and inserts
them into the existing rollups tables. this is as if we changed
the nodes to report as the order expired rather than as soon as
possible, so the belief in correctness of the refactor is higher.

since we are able to process large batches of orders (typically
a day's worth), we can use the code to maximally batch inserts into
the rollup tables to make inserts as friendly as possible to
cockroach.

Change-Id: I25d609ca2679b8331979184f16c6d46d4f74c1a6
2020-01-15 19:21:21 -07:00

178 lines
5.1 KiB
Go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package orders
import (
"context"
"sync"
"time"
"github.com/skyrings/skyring-common/tools/uuid"
"go.uber.org/zap"
"storj.io/common/pb"
"storj.io/common/sync2"
)
// CacheData stores the amount of inline and allocated data
// for a bucket bandwidth rollup
type CacheData struct {
Inline int64
Allocated int64
}
// CacheKey is the key information for the cached map below
type CacheKey struct {
ProjectID uuid.UUID
BucketName string
Action pb.PieceAction
}
// RollupData contains the pending rollups waiting to be flushed to the db
type RollupData map[CacheKey]CacheData
// RollupsWriteCache stores information needed to update bucket bandwidth rollups
type RollupsWriteCache struct {
DB
batchSize int
currentSize int
latestTime time.Time
log *zap.Logger
mu sync.Mutex
pendingRollups RollupData
nextFlushCompletion *sync2.Fence
}
// NewRollupsWriteCache creates an RollupsWriteCache
func NewRollupsWriteCache(log *zap.Logger, db DB, batchSize int) *RollupsWriteCache {
return &RollupsWriteCache{
DB: db,
batchSize: batchSize,
log: log,
pendingRollups: make(RollupData),
nextFlushCompletion: new(sync2.Fence),
}
}
// UpdateBucketBandwidthAllocation updates the rollups cache adding allocated data for a bucket bandwidth rollup
func (cache *RollupsWriteCache) UpdateBucketBandwidthAllocation(ctx context.Context, projectID uuid.UUID, bucketName []byte, action pb.PieceAction, amount int64, intervalStart time.Time) error {
cache.updateCacheValue(ctx, projectID, bucketName, action, amount, 0, intervalStart.UTC())
return nil
}
// UpdateBucketBandwidthInline updates the rollups cache adding inline data for a bucket bandwidth rollup
func (cache *RollupsWriteCache) UpdateBucketBandwidthInline(ctx context.Context, projectID uuid.UUID, bucketName []byte, action pb.PieceAction, amount int64, intervalStart time.Time) error {
cache.updateCacheValue(ctx, projectID, bucketName, action, 0, amount, intervalStart.UTC())
return nil
}
// FlushToDB resets cache then flushes the everything in the rollups write cache to the database
func (cache *RollupsWriteCache) FlushToDB(ctx context.Context) {
defer mon.Task()(&ctx)(nil)
cache.mu.Lock()
defer cache.mu.Unlock()
pendingRollups := cache.pendingRollups
cache.pendingRollups = make(RollupData)
oldSize := cache.currentSize
cache.currentSize = 0
latestTime := cache.latestTime
cache.latestTime = time.Time{}
go cache.flushToDB(ctx, pendingRollups, latestTime, oldSize)
}
// flushToDB flushes the everything in the rollups write cache to the database
func (cache *RollupsWriteCache) flushToDB(ctx context.Context, pendingRollups RollupData, latestTime time.Time, oldSize int) {
defer mon.Task()(&ctx)(nil)
rollups := make([]BucketBandwidthRollup, 0, oldSize)
for cacheKey, cacheData := range pendingRollups {
rollups = append(rollups, BucketBandwidthRollup{
ProjectID: cacheKey.ProjectID,
BucketName: cacheKey.BucketName,
Action: cacheKey.Action,
Inline: cacheData.Inline,
Allocated: cacheData.Allocated,
})
}
err := cache.DB.ExecuteInTx(ctx, func(ctx context.Context, tx Transaction) error {
return tx.UpdateBucketBandwidthBatch(ctx, latestTime, rollups)
})
if err != nil {
cache.log.Error("MONEY LOST! Bucket bandwidth rollup batch flush failed.", zap.Error(err))
}
var completion *sync2.Fence
cache.mu.Lock()
cache.nextFlushCompletion, completion = new(sync2.Fence), cache.nextFlushCompletion
cache.mu.Unlock()
completion.Release()
}
func (cache *RollupsWriteCache) updateCacheValue(ctx context.Context, projectID uuid.UUID, bucketName []byte, action pb.PieceAction, allocated, inline int64, intervalStart time.Time) {
defer mon.Task()(&ctx)(nil)
cache.mu.Lock()
defer cache.mu.Unlock()
if intervalStart.After(cache.latestTime) {
cache.latestTime = intervalStart
}
key := CacheKey{
ProjectID: projectID,
BucketName: string(bucketName),
Action: action,
}
data, ok := cache.pendingRollups[key]
if !ok {
cache.currentSize++
}
data.Allocated += allocated
data.Inline += inline
cache.pendingRollups[key] = data
if cache.currentSize < cache.batchSize {
return
}
pendingRollups := cache.pendingRollups
cache.pendingRollups = make(RollupData)
oldSize := cache.currentSize
cache.currentSize = 0
latestTime := cache.latestTime
cache.latestTime = time.Time{}
go cache.flushToDB(ctx, pendingRollups, latestTime, oldSize)
}
// OnNextFlush waits until the next time a flushToDB call is made, then closes
// the returned channel.
func (cache *RollupsWriteCache) OnNextFlush() <-chan struct{} {
cache.mu.Lock()
fence := cache.nextFlushCompletion
cache.mu.Unlock()
return fence.Done()
}
// CurrentSize returns the current size of the cache.
func (cache *RollupsWriteCache) CurrentSize() int {
cache.mu.Lock()
defer cache.mu.Unlock()
return cache.currentSize
}
// CurrentData returns the contents of the cache.
func (cache *RollupsWriteCache) CurrentData() RollupData {
cache.mu.Lock()
defer cache.mu.Unlock()
copyCache := RollupData{}
for k, v := range cache.pendingRollups {
copyCache[k] = v
}
return copyCache
}