storj/satellite/gracefulexit/common.go
Yingrong Zhao fa1ac24e19
satellite/gracefulexit: add failure threshold check (#3329)
* add overall failure percentage check and inactive time frame check before sending a response to sno

* update comment

* delete node from transfer queue if it has been inactive for too long

* fix linting error

* add test config value

* fix nil pointer

* add config value into testplanet

* add unit test for overall failure threshold

* move timeframe threshold to chore

* update protolock

* add chore test

* add per peiece failure count logic

* change config name from EndpointMaxFailures to MaxFailuresPerPiece

* address comments

* fix linting error

* add error handling for no row returned from progress table

* fix test for graceful exit chore on storagenode

* fix typo InActive -> Inactive

* improve readability for failure threshold calculation

* update config lock

* change error handling for GetProgress in graceful exit endpoint on the satellite side

* return proper rpc error in endpoint

* add check in chore test for checking finish timestamp and queue
2019-10-24 12:24:42 -04:00

35 lines
1.2 KiB
Go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package gracefulexit
import (
"time"
"github.com/zeebo/errs"
"gopkg.in/spacemonkeygo/monkit.v2"
)
var (
// Error is the default error class for graceful exit package.
Error = errs.Class("gracefulexit")
// ErrNodeNotFound is returned if a graceful exit entry for a node does not exist in database
ErrNodeNotFound = errs.Class("graceful exit node not found")
mon = monkit.Package()
)
// Config for the chore
type Config struct {
ChoreBatchSize int `help:"size of the buffer used to batch inserts into the transfer queue." default:"500"`
ChoreInterval time.Duration `help:"how often to run the transfer queue chore." releaseDefault:"30s" devDefault:"10s"`
EndpointBatchSize int `help:"size of the buffer used to batch transfer queue reads and sends to the storage node." default:"100"`
MaxFailuresPerPiece int `help:"maximum number of transfer failures per piece." default:"3"`
// TODO: what's the default number?
OverallMaxFailuresPercentage int `help:"maximum percentage of transfer failures per node." default:"10"`
MaxInactiveTimeFrame time.Duration `help:"maximum inactive time frame of transfer activities per node." default:"500h"`
}