satellite/metainfo: adjust piecedeletion timeouts
Currently slower storagenodes can slow down deletion queue. To make piece deletion faster reduce the maximum time spent in either dialing or piece deletion requests. With this change: * dial timeout is 3s * request timeout is 15s * fail threshold is set to 10min Similarly, we'll mark storage node as failed when the timeout occurs. The timeout usually indicates that the storagenode is overwhelmed. Garbage collection will ensure that the pieces get deleted eventually. Change-Id: Iec5de699f5917905f5807140e2c3252088c6399b
This commit is contained in:
parent
d3a0364f21
commit
edb8d656de
@ -5,6 +5,7 @@ package piecedeletion
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
@ -106,7 +107,7 @@ func (dialer *Dialer) Handle(ctx context.Context, node storj.NodeURL, queue Queu
|
||||
if err != nil {
|
||||
dialer.log.Debug("deletion request failed", zap.Stringer("id", node.ID), zap.Error(err))
|
||||
// don't try to send to this storage node a bit, when the deletion times out
|
||||
if errs2.IsCanceled(err) {
|
||||
if errs2.IsCanceled(err) || errors.Is(err, context.DeadlineExceeded) {
|
||||
dialer.markFailed(ctx, node)
|
||||
}
|
||||
break
|
||||
|
@ -25,9 +25,9 @@ type Config struct {
|
||||
MaxPiecesPerBatch int `help:"maximum number of pieces per batch" default:"5000" testDefault:"4000"`
|
||||
MaxPiecesPerRequest int `help:"maximum number pieces per single request" default:"1000" testDefault:"2000"`
|
||||
|
||||
DialTimeout time.Duration `help:"timeout for dialing nodes (0 means satellite default)" default:"0" testDefault:"2s"`
|
||||
FailThreshold time.Duration `help:"threshold for retrying a failed node" releaseDefault:"5m" devDefault:"2s"`
|
||||
RequestTimeout time.Duration `help:"timeout for a single delete request" releaseDefault:"1m" devDefault:"2s"`
|
||||
DialTimeout time.Duration `help:"timeout for dialing nodes (0 means satellite default)" default:"3s" testDefault:"2s"`
|
||||
FailThreshold time.Duration `help:"threshold for retrying a failed node" releaseDefault:"10m" devDefault:"2s"`
|
||||
RequestTimeout time.Duration `help:"timeout for a single delete request" releaseDefault:"15s" devDefault:"2s"`
|
||||
}
|
||||
|
||||
const (
|
||||
|
6
scripts/testdata/satellite-config.yaml.lock
vendored
6
scripts/testdata/satellite-config.yaml.lock
vendored
@ -404,10 +404,10 @@ identity.key-path: /root/.local/share/storj/identity/satellite/identity.key
|
||||
# metainfo.overlay: true
|
||||
|
||||
# timeout for dialing nodes (0 means satellite default)
|
||||
# metainfo.piece-deletion.dial-timeout: 0s
|
||||
# metainfo.piece-deletion.dial-timeout: 3s
|
||||
|
||||
# threshold for retrying a failed node
|
||||
# metainfo.piece-deletion.fail-threshold: 5m0s
|
||||
# metainfo.piece-deletion.fail-threshold: 10m0s
|
||||
|
||||
# maximum number of concurrent requests to storage nodes
|
||||
# metainfo.piece-deletion.max-concurrency: 100
|
||||
@ -422,7 +422,7 @@ identity.key-path: /root/.local/share/storj/identity/satellite/identity.key
|
||||
# metainfo.piece-deletion.max-pieces-per-request: 1000
|
||||
|
||||
# timeout for a single delete request
|
||||
# metainfo.piece-deletion.request-timeout: 1m0s
|
||||
# metainfo.piece-deletion.request-timeout: 15s
|
||||
|
||||
# max bucket count for a project.
|
||||
# metainfo.project-limits.max-buckets: 100
|
||||
|
Loading…
Reference in New Issue
Block a user