7fb86617fc
Use the 'AS OF SYSTEM TIME' Cockroach DB clause for the Graceful Exit (a.k.a GE) queries that count the delete the GE queue items of nodes which have already exited the network. Split the subquery used for deleting all the transfer queue items of nodes which has exited when CRDB is used and batch the queries because CRDB struggles when executing in a single query unlike Postgres. The new test which has been added to this commit to verify the CRDB batch logic for deleting all the transfer queue items of the exited nodes has raised that the Enqueue method has to run in baches when CRDB is used otherwise CRDB has return the error "driver: bad connection" when a big a amount of items are passed to be enqueued. This error didn't happen with the current test implementation it was with an initial one that it was creating a big amount of exited nodes and transfer queue items for those nodes. Change-Id: I6a099cdbc515a240596bc93141fea3182c2e50a9
45 lines
2.2 KiB
Go
45 lines
2.2 KiB
Go
// Copyright (C) 2019 Storj Labs, Inc.
|
|
// See LICENSE for copying information.
|
|
|
|
package gracefulexit
|
|
|
|
import (
|
|
"time"
|
|
|
|
"github.com/spacemonkeygo/monkit/v3"
|
|
"github.com/zeebo/errs"
|
|
)
|
|
|
|
var (
|
|
// Error is the default error class for graceful exit package.
|
|
Error = errs.Class("gracefulexit")
|
|
|
|
// ErrNodeNotFound is returned if a graceful exit entry for a node does not exist in database.
|
|
ErrNodeNotFound = errs.Class("graceful exit node not found")
|
|
|
|
// ErrAboveOptimalThreshold is returned if a graceful exit entry for a node has more pieces than required.
|
|
ErrAboveOptimalThreshold = errs.Class("segment has more pieces than required")
|
|
|
|
mon = monkit.Package()
|
|
)
|
|
|
|
// Config for the chore.
|
|
type Config struct {
|
|
Enabled bool `help:"whether or not graceful exit is enabled on the satellite side." default:"true"`
|
|
|
|
ChoreBatchSize int `help:"size of the buffer used to batch inserts into the transfer queue." default:"500"`
|
|
ChoreInterval time.Duration `help:"how often to run the transfer queue chore." releaseDefault:"30s" devDefault:"10s"`
|
|
|
|
EndpointBatchSize int `help:"size of the buffer used to batch transfer queue reads and sends to the storage node." default:"300"`
|
|
|
|
MaxFailuresPerPiece int `help:"maximum number of transfer failures per piece." default:"5"`
|
|
OverallMaxFailuresPercentage int `help:"maximum percentage of transfer failures per node." default:"10"`
|
|
MaxInactiveTimeFrame time.Duration `help:"maximum inactive time frame of transfer activities per node." default:"168h"`
|
|
RecvTimeout time.Duration `help:"the minimum duration for receiving a stream from a storage node before timing out" default:"2h"`
|
|
MaxOrderLimitSendCount int `help:"maximum number of order limits a satellite sends to a node before marking piece transfer failed" default:"10"`
|
|
NodeMinAgeInMonths int `help:"minimum age for a node on the network in order to initiate graceful exit" default:"6"`
|
|
|
|
AsOfSystemTimeInterval time.Duration `help:"interval for AS OF SYSTEM TIME clause (crdb specific) to read from db at a specific time in the past " default:"-10s"`
|
|
TransferQueueBatchSize int `help:"batch size (crdb specific) for deleting and adding items to the transfer queue" default:"1000"`
|
|
}
|