satellite/metainfo: adjust piecedeletion timeouts

Currently slower storagenodes can slow down deletion queue.
To make piece deletion faster reduce the maximum time spent in
either dialing or piece deletion requests.

With this change:
* dial timeout is 3s
* request timeout is 15s
* fail threshold is set to 10min

Similarly, we'll mark storage node as failed when the timeout occurs.
The timeout usually indicates that the storagenode is overwhelmed.
Garbage collection will ensure that the pieces get deleted eventually.

Change-Id: Iec5de699f5917905f5807140e2c3252088c6399b
This commit is contained in:
Egon Elbre 2021-10-28 13:37:01 +03:00
parent d3a0364f21
commit edb8d656de
3 changed files with 8 additions and 7 deletions

View File

@ -5,6 +5,7 @@ package piecedeletion
import (
"context"
"errors"
"strconv"
"sync"
"time"
@ -106,7 +107,7 @@ func (dialer *Dialer) Handle(ctx context.Context, node storj.NodeURL, queue Queu
if err != nil {
dialer.log.Debug("deletion request failed", zap.Stringer("id", node.ID), zap.Error(err))
// don't try to send to this storage node a bit, when the deletion times out
if errs2.IsCanceled(err) {
if errs2.IsCanceled(err) || errors.Is(err, context.DeadlineExceeded) {
dialer.markFailed(ctx, node)
}
break

View File

@ -25,9 +25,9 @@ type Config struct {
MaxPiecesPerBatch int `help:"maximum number of pieces per batch" default:"5000" testDefault:"4000"`
MaxPiecesPerRequest int `help:"maximum number pieces per single request" default:"1000" testDefault:"2000"`
DialTimeout time.Duration `help:"timeout for dialing nodes (0 means satellite default)" default:"0" testDefault:"2s"`
FailThreshold time.Duration `help:"threshold for retrying a failed node" releaseDefault:"5m" devDefault:"2s"`
RequestTimeout time.Duration `help:"timeout for a single delete request" releaseDefault:"1m" devDefault:"2s"`
DialTimeout time.Duration `help:"timeout for dialing nodes (0 means satellite default)" default:"3s" testDefault:"2s"`
FailThreshold time.Duration `help:"threshold for retrying a failed node" releaseDefault:"10m" devDefault:"2s"`
RequestTimeout time.Duration `help:"timeout for a single delete request" releaseDefault:"15s" devDefault:"2s"`
}
const (

View File

@ -404,10 +404,10 @@ identity.key-path: /root/.local/share/storj/identity/satellite/identity.key
# metainfo.overlay: true
# timeout for dialing nodes (0 means satellite default)
# metainfo.piece-deletion.dial-timeout: 0s
# metainfo.piece-deletion.dial-timeout: 3s
# threshold for retrying a failed node
# metainfo.piece-deletion.fail-threshold: 5m0s
# metainfo.piece-deletion.fail-threshold: 10m0s
# maximum number of concurrent requests to storage nodes
# metainfo.piece-deletion.max-concurrency: 100
@ -422,7 +422,7 @@ identity.key-path: /root/.local/share/storj/identity/satellite/identity.key
# metainfo.piece-deletion.max-pieces-per-request: 1000
# timeout for a single delete request
# metainfo.piece-deletion.request-timeout: 1m0s
# metainfo.piece-deletion.request-timeout: 15s
# max bucket count for a project.
# metainfo.project-limits.max-buckets: 100