satellite/gracefulexit: increase performance and tolerate higher error
rate Graceful exit is very slow at the moment. Over the last couple days we increase the batch size on Stefans satellite to 1000 but as a side effect the error rate was increased. With a batch size of 500 the error rate looks stable. This PR will increase the default to batch size to 300. Graceful exit will still be painful slow but at least it will be a bit faster. At the same time this PR also increases the number of errors we tolerate. We don't want to DQ slow storage nodes just because they didn't finish all 300 transfers in time. We want to give them more retries. Change-Id: I92e3f99e116d4988457d8b902a88e85ed1bcc1a7
This commit is contained in:
parent
37cf42a9ae
commit
76849558cb
@ -30,12 +30,12 @@ type Config struct {
|
||||
ChoreBatchSize int `help:"size of the buffer used to batch inserts into the transfer queue." default:"500"`
|
||||
ChoreInterval time.Duration `help:"how often to run the transfer queue chore." releaseDefault:"30s" devDefault:"10s"`
|
||||
|
||||
EndpointBatchSize int `help:"size of the buffer used to batch transfer queue reads and sends to the storage node." default:"100"`
|
||||
EndpointBatchSize int `help:"size of the buffer used to batch transfer queue reads and sends to the storage node." default:"300"`
|
||||
|
||||
MaxFailuresPerPiece int `help:"maximum number of transfer failures per piece." default:"3"`
|
||||
MaxFailuresPerPiece int `help:"maximum number of transfer failures per piece." default:"5"`
|
||||
OverallMaxFailuresPercentage int `help:"maximum percentage of transfer failures per node." default:"10"`
|
||||
MaxInactiveTimeFrame time.Duration `help:"maximum inactive time frame of transfer activities per node." default:"168h"`
|
||||
RecvTimeout time.Duration `help:"the minimum duration for receiving a stream from a storage node before timing out" default:"10m"`
|
||||
MaxOrderLimitSendCount int `help:"maximum number of order limits a satellite sends to a node before marking piece transfer failed" default:"5"`
|
||||
MaxOrderLimitSendCount int `help:"maximum number of order limits a satellite sends to a node before marking piece transfer failed" default:"10"`
|
||||
NodeMinAgeInMonths int `help:"minimum age for a node on the network in order to initiate graceful exit" default:"6"`
|
||||
}
|
||||
|
6
scripts/testdata/satellite-config.yaml.lock
vendored
6
scripts/testdata/satellite-config.yaml.lock
vendored
@ -146,16 +146,16 @@ contact.external-address: ""
|
||||
# graceful-exit.enabled: true
|
||||
|
||||
# size of the buffer used to batch transfer queue reads and sends to the storage node.
|
||||
# graceful-exit.endpoint-batch-size: 100
|
||||
# graceful-exit.endpoint-batch-size: 300
|
||||
|
||||
# maximum number of transfer failures per piece.
|
||||
# graceful-exit.max-failures-per-piece: 3
|
||||
# graceful-exit.max-failures-per-piece: 5
|
||||
|
||||
# maximum inactive time frame of transfer activities per node.
|
||||
# graceful-exit.max-inactive-time-frame: 168h0m0s
|
||||
|
||||
# maximum number of order limits a satellite sends to a node before marking piece transfer failed
|
||||
# graceful-exit.max-order-limit-send-count: 5
|
||||
# graceful-exit.max-order-limit-send-count: 10
|
||||
|
||||
# minimum age for a node on the network in order to initiate graceful exit
|
||||
# graceful-exit.node-min-age-in-months: 6
|
||||
|
Loading…
Reference in New Issue
Block a user