satellite/satellitedb: batch delete storage node tallies
Currently we have a significant number of tallies that need to be deleted together. Add a limit (by default 10k) to how many will be deleted at the same time. Change-Id: If530383f19b4d3bb83ed5fe956610a2e52f130a1
This commit is contained in:
parent
a9de5ce6c3
commit
82fede2132
@ -188,7 +188,7 @@ type StoragenodeAccounting interface {
|
|||||||
// QueryStorageNodeUsage returns slice of StorageNodeUsage for given period
|
// QueryStorageNodeUsage returns slice of StorageNodeUsage for given period
|
||||||
QueryStorageNodeUsage(ctx context.Context, nodeID storj.NodeID, start time.Time, end time.Time) ([]StorageNodeUsage, error)
|
QueryStorageNodeUsage(ctx context.Context, nodeID storj.NodeID, start time.Time, end time.Time) ([]StorageNodeUsage, error)
|
||||||
// DeleteTalliesBefore deletes all tallies prior to some time
|
// DeleteTalliesBefore deletes all tallies prior to some time
|
||||||
DeleteTalliesBefore(ctx context.Context, latestRollup time.Time) error
|
DeleteTalliesBefore(ctx context.Context, latestRollup time.Time, batchSize int) error
|
||||||
// ArchiveRollupsBefore archives rollups older than a given time and returns num storagenode and bucket bandwidth rollups archived.
|
// ArchiveRollupsBefore archives rollups older than a given time and returns num storagenode and bucket bandwidth rollups archived.
|
||||||
ArchiveRollupsBefore(ctx context.Context, before time.Time, batchSize int) (numArchivedNodeBW int, err error)
|
ArchiveRollupsBefore(ctx context.Context, before time.Time, batchSize int) (numArchivedNodeBW int, err error)
|
||||||
// GetRollupsSince retrieves all archived bandwidth rollup records since a given time. A hard limit batch size is used for results.
|
// GetRollupsSince retrieves all archived bandwidth rollup records since a given time. A hard limit batch size is used for results.
|
||||||
|
@ -17,29 +17,32 @@ import (
|
|||||||
|
|
||||||
// Config contains configurable values for rollup.
|
// Config contains configurable values for rollup.
|
||||||
type Config struct {
|
type Config struct {
|
||||||
Interval time.Duration `help:"how frequently rollup should run" releaseDefault:"24h" devDefault:"120s" testDefault:"$TESTINTERVAL"`
|
Interval time.Duration `help:"how frequently rollup should run" releaseDefault:"24h" devDefault:"120s" testDefault:"$TESTINTERVAL"`
|
||||||
DeleteTallies bool `help:"option for deleting tallies after they are rolled up" default:"true"`
|
DeleteTallies bool `help:"option for deleting tallies after they are rolled up" default:"true"`
|
||||||
|
DeleteTalliesBatchSize int `help:"how many tallies to delete in a batch" default:"10000"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Service is the rollup service for totalling data on storage nodes on daily intervals.
|
// Service is the rollup service for totalling data on storage nodes on daily intervals.
|
||||||
//
|
//
|
||||||
// architecture: Chore
|
// architecture: Chore
|
||||||
type Service struct {
|
type Service struct {
|
||||||
logger *zap.Logger
|
logger *zap.Logger
|
||||||
Loop *sync2.Cycle
|
Loop *sync2.Cycle
|
||||||
sdb accounting.StoragenodeAccounting
|
sdb accounting.StoragenodeAccounting
|
||||||
deleteTallies bool
|
deleteTallies bool
|
||||||
OrderExpiration time.Duration
|
deleteTalliesBatchSize int
|
||||||
|
OrderExpiration time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new rollup service.
|
// New creates a new rollup service.
|
||||||
func New(logger *zap.Logger, sdb accounting.StoragenodeAccounting, interval time.Duration, deleteTallies bool, orderExpiration time.Duration) *Service {
|
func New(logger *zap.Logger, sdb accounting.StoragenodeAccounting, config Config, orderExpiration time.Duration) *Service {
|
||||||
return &Service{
|
return &Service{
|
||||||
logger: logger,
|
logger: logger,
|
||||||
Loop: sync2.NewCycle(interval),
|
Loop: sync2.NewCycle(config.Interval),
|
||||||
sdb: sdb,
|
sdb: sdb,
|
||||||
deleteTallies: deleteTallies,
|
deleteTallies: config.DeleteTallies,
|
||||||
OrderExpiration: orderExpiration,
|
deleteTalliesBatchSize: config.DeleteTalliesBatchSize,
|
||||||
|
OrderExpiration: orderExpiration,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,7 +104,7 @@ func (r *Service) Rollup(ctx context.Context) (err error) {
|
|||||||
if r.deleteTallies {
|
if r.deleteTallies {
|
||||||
// Delete already rolled up tallies
|
// Delete already rolled up tallies
|
||||||
latestTally = latestTally.Add(-r.OrderExpiration)
|
latestTally = latestTally.Add(-r.OrderExpiration)
|
||||||
err = r.sdb.DeleteTalliesBefore(ctx, latestTally)
|
err = r.sdb.DeleteTalliesBefore(ctx, latestTally, r.deleteTalliesBatchSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Error.Wrap(err)
|
return Error.Wrap(err)
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,7 @@ func TestRollupNoDeletes(t *testing.T) {
|
|||||||
storageNodes = createNodes(ctx, t, db)
|
storageNodes = createNodes(ctx, t, db)
|
||||||
)
|
)
|
||||||
|
|
||||||
rollupService := rollup.New(testplanet.NewLogger(t), snAccountingDB, 120*time.Second, false, time.Hour)
|
rollupService := rollup.New(testplanet.NewLogger(t), snAccountingDB, rollup.Config{Interval: 120 * time.Second}, time.Hour)
|
||||||
|
|
||||||
// disqualifying nodes is unrelated to this test, but it is added here
|
// disqualifying nodes is unrelated to this test, but it is added here
|
||||||
// to confirm the disqualification shows up in the accounting CSVRow
|
// to confirm the disqualification shows up in the accounting CSVRow
|
||||||
@ -147,7 +147,7 @@ func TestRollupDeletes(t *testing.T) {
|
|||||||
storageNodes = createNodes(ctx, t, db)
|
storageNodes = createNodes(ctx, t, db)
|
||||||
)
|
)
|
||||||
|
|
||||||
rollupService := rollup.New(testplanet.NewLogger(t), snAccountingDB, 120*time.Second, true, time.Hour)
|
rollupService := rollup.New(testplanet.NewLogger(t), snAccountingDB, rollup.Config{Interval: 120 * time.Second, DeleteTallies: true}, time.Hour)
|
||||||
|
|
||||||
// disqualifying nodes is unrelated to this test, but it is added here
|
// disqualifying nodes is unrelated to this test, but it is added here
|
||||||
// to confirm the disqualification shows up in the accounting CSVRow
|
// to confirm the disqualification shows up in the accounting CSVRow
|
||||||
|
@ -17,7 +17,6 @@ import (
|
|||||||
"storj.io/common/testrand"
|
"storj.io/common/testrand"
|
||||||
"storj.io/common/uuid"
|
"storj.io/common/uuid"
|
||||||
"storj.io/storj/private/testplanet"
|
"storj.io/storj/private/testplanet"
|
||||||
"storj.io/storj/private/teststorj"
|
|
||||||
"storj.io/storj/satellite/accounting"
|
"storj.io/storj/satellite/accounting"
|
||||||
"storj.io/storj/satellite/accounting/tally"
|
"storj.io/storj/satellite/accounting/tally"
|
||||||
"storj.io/storj/satellite/metabase"
|
"storj.io/storj/satellite/metabase"
|
||||||
@ -30,7 +29,7 @@ func TestDeleteTalliesBefore(t *testing.T) {
|
|||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
eraseBefore: time.Now(),
|
eraseBefore: time.Now(),
|
||||||
expectedRaws: 1,
|
expectedRaws: 3,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
eraseBefore: time.Now().Add(24 * time.Hour),
|
eraseBefore: time.Now().Add(24 * time.Hour),
|
||||||
@ -43,14 +42,15 @@ func TestDeleteTalliesBefore(t *testing.T) {
|
|||||||
testplanet.Run(t, testplanet.Config{
|
testplanet.Run(t, testplanet.Config{
|
||||||
SatelliteCount: 1, StorageNodeCount: 0, UplinkCount: 0,
|
SatelliteCount: 1, StorageNodeCount: 0, UplinkCount: 0,
|
||||||
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
||||||
id := teststorj.NodeIDFromBytes([]byte{})
|
|
||||||
nodeData := make(map[storj.NodeID]float64)
|
nodeData := make(map[storj.NodeID]float64)
|
||||||
nodeData[id] = float64(1000)
|
nodeData[storj.NodeID{1}] = float64(1000)
|
||||||
|
nodeData[storj.NodeID{2}] = float64(1000)
|
||||||
|
nodeData[storj.NodeID{3}] = float64(1000)
|
||||||
|
|
||||||
err := planet.Satellites[0].DB.StoragenodeAccounting().SaveTallies(ctx, time.Now(), nodeData)
|
err := planet.Satellites[0].DB.StoragenodeAccounting().SaveTallies(ctx, time.Now(), nodeData)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
err = planet.Satellites[0].DB.StoragenodeAccounting().DeleteTalliesBefore(ctx, test.eraseBefore)
|
err = planet.Satellites[0].DB.StoragenodeAccounting().DeleteTalliesBefore(ctx, test.eraseBefore, 1)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
raws, err := planet.Satellites[0].DB.StoragenodeAccounting().GetTallies(ctx)
|
raws, err := planet.Satellites[0].DB.StoragenodeAccounting().GetTallies(ctx)
|
||||||
|
@ -472,7 +472,7 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB,
|
|||||||
|
|
||||||
// Lets add 1 more day so we catch any off by one errors when deleting tallies
|
// Lets add 1 more day so we catch any off by one errors when deleting tallies
|
||||||
orderExpirationPlusDay := config.Orders.Expiration + config.Rollup.Interval
|
orderExpirationPlusDay := config.Orders.Expiration + config.Rollup.Interval
|
||||||
peer.Accounting.Rollup = rollup.New(peer.Log.Named("accounting:rollup"), peer.DB.StoragenodeAccounting(), config.Rollup.Interval, config.Rollup.DeleteTallies, orderExpirationPlusDay)
|
peer.Accounting.Rollup = rollup.New(peer.Log.Named("accounting:rollup"), peer.DB.StoragenodeAccounting(), config.Rollup, orderExpirationPlusDay)
|
||||||
peer.Services.Add(lifecycle.Item{
|
peer.Services.Add(lifecycle.Item{
|
||||||
Name: "accounting:rollup",
|
Name: "accounting:rollup",
|
||||||
Run: peer.Accounting.Rollup.Run,
|
Run: peer.Accounting.Rollup.Run,
|
||||||
|
@ -519,11 +519,52 @@ func (db *StoragenodeAccounting) QueryStorageNodeUsage(ctx context.Context, node
|
|||||||
}
|
}
|
||||||
|
|
||||||
// DeleteTalliesBefore deletes all raw tallies prior to some time.
|
// DeleteTalliesBefore deletes all raw tallies prior to some time.
|
||||||
func (db *StoragenodeAccounting) DeleteTalliesBefore(ctx context.Context, latestRollup time.Time) (err error) {
|
func (db *StoragenodeAccounting) DeleteTalliesBefore(ctx context.Context, latestRollup time.Time, batchSize int) (err error) {
|
||||||
defer mon.Task()(&ctx)(&err)
|
defer mon.Task()(&ctx)(&err)
|
||||||
deleteRawSQL := `DELETE FROM storagenode_storage_tallies WHERE interval_end_time < ?`
|
|
||||||
_, err = db.db.DB.ExecContext(ctx, db.db.Rebind(deleteRawSQL), latestRollup)
|
if batchSize <= 0 {
|
||||||
return err
|
batchSize = 10000
|
||||||
|
}
|
||||||
|
|
||||||
|
var query string
|
||||||
|
switch db.db.impl {
|
||||||
|
case dbutil.Cockroach:
|
||||||
|
query = `
|
||||||
|
DELETE FROM storagenode_storage_tallies
|
||||||
|
WHERE interval_end_time < ?
|
||||||
|
LIMIT ?`
|
||||||
|
case dbutil.Postgres:
|
||||||
|
query = `
|
||||||
|
DELETE FROM storagenode_storage_tallies
|
||||||
|
WHERE ctid IN (
|
||||||
|
SELECT ctid
|
||||||
|
FROM storagenode_storage_tallies
|
||||||
|
WHERE interval_end_time < ?
|
||||||
|
ORDER BY interval_end_time
|
||||||
|
LIMIT ?
|
||||||
|
)`
|
||||||
|
default:
|
||||||
|
return Error.New("unsupported database: %v", db.db.impl)
|
||||||
|
}
|
||||||
|
query = db.db.Rebind(query)
|
||||||
|
|
||||||
|
for {
|
||||||
|
res, err := db.db.DB.ExecContext(ctx, query, latestRollup, batchSize)
|
||||||
|
if err != nil {
|
||||||
|
if errs.Is(err, sql.ErrNoRows) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return Error.Wrap(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
affected, err := res.RowsAffected()
|
||||||
|
if err != nil {
|
||||||
|
return Error.Wrap(err)
|
||||||
|
}
|
||||||
|
if affected == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ArchiveRollupsBefore archives rollups older than a given time.
|
// ArchiveRollupsBefore archives rollups older than a given time.
|
||||||
|
3
scripts/testdata/satellite-config.yaml.lock
vendored
3
scripts/testdata/satellite-config.yaml.lock
vendored
@ -817,6 +817,9 @@ identity.key-path: /root/.local/share/storj/identity/satellite/identity.key
|
|||||||
# option for deleting tallies after they are rolled up
|
# option for deleting tallies after they are rolled up
|
||||||
# rollup.delete-tallies: true
|
# rollup.delete-tallies: true
|
||||||
|
|
||||||
|
# how many tallies to delete in a batch
|
||||||
|
# rollup.delete-tallies-batch-size: 10000
|
||||||
|
|
||||||
# how frequently rollup should run
|
# how frequently rollup should run
|
||||||
# rollup.interval: 24h0m0s
|
# rollup.interval: 24h0m0s
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user