satellite/overlay/config.go: Add AuditHistoryConfig to overlay
Adds AuditHistory{WindowSize, TrackingPeriod, GracePeriod, OfflineThreshold}. These values will be used to track offline audits over time, and to suspend/disqualify nodes for being offline for too long. Change-Id: I05f7dbc3c034bdc53c4fbd7719c71a44f37ec6a5
This commit is contained in:
parent
5dfe27f175
commit
e02adfe5e9
@ -415,6 +415,12 @@ func (planet *Planet) newSatellites(count int, satelliteDatabases satellitedbtes
|
||||
Staleness: 3 * time.Minute,
|
||||
},
|
||||
UpdateStatsBatchSize: 100,
|
||||
AuditHistory: overlay.AuditHistoryConfig{
|
||||
WindowSize: 10 * time.Minute,
|
||||
TrackingPeriod: time.Hour,
|
||||
GracePeriod: time.Hour,
|
||||
OfflineThreshold: 0.6,
|
||||
},
|
||||
},
|
||||
Metainfo: metainfo.Config{
|
||||
DatabaseURL: "", // not used
|
||||
|
@ -23,6 +23,7 @@ type Config struct {
|
||||
Node NodeSelectionConfig
|
||||
NodeSelectionCache CacheConfig
|
||||
UpdateStatsBatchSize int `help:"number of update requests to process per transaction" default:"100"`
|
||||
AuditHistory AuditHistoryConfig
|
||||
}
|
||||
|
||||
// NodeSelectionConfig is a configuration struct to determine the minimum
|
||||
@ -44,3 +45,12 @@ type NodeSelectionConfig struct {
|
||||
SuspensionGracePeriod time.Duration `help:"the time period that must pass before suspended nodes will be disqualified" releaseDefault:"168h" devDefault:"1h"`
|
||||
SuspensionDQEnabled bool `help:"whether nodes will be disqualified if they have been suspended for longer than the suspended grace period" releaseDefault:"false" devDefault:"true"`
|
||||
}
|
||||
|
||||
// AuditHistoryConfig is a configuration struct defining time periods and thresholds for penalizing nodes for being offline.
|
||||
// It is used for downtime suspension and disqualification.
|
||||
type AuditHistoryConfig struct {
|
||||
WindowSize time.Duration `help:"The length of time spanning a single audit window" releaseDefault:"12h" devDefault:"5m"`
|
||||
TrackingPeriod time.Duration `help:"The length of time to track audit windows for node suspension and disqualification" releaseDefault:"720h" devDefault:"1h"`
|
||||
GracePeriod time.Duration `help:"The length of time to give suspended SNOs to diagnose and fix issues causing downtime. Afterwards, they will have one tracking period to reach the minimum online score before disqualification" releaseDefault:"168h" devDefault:"1h"`
|
||||
OfflineThreshold float64 `help:"The point below which a node is punished for offline audits. Determined by calculating the ratio of online/total audits within each window and finding the average across windows within the tracking period." default:"0.6"`
|
||||
}
|
||||
|
12
scripts/testdata/satellite-config.yaml.lock
vendored
Normal file → Executable file
12
scripts/testdata/satellite-config.yaml.lock
vendored
Normal file → Executable file
@ -454,6 +454,18 @@ identity.key-path: /root/.local/share/storj/identity/satellite/identity.key
|
||||
# rollout phase for the windowed endpoint
|
||||
# orders.window-endpoint-rollout-phase: phase1
|
||||
|
||||
# The length of time to give suspended SNOs to diagnose and fix issues causing downtime. Afterwards, they will have one tracking period to reach the minimum online score before disqualification
|
||||
# overlay.audit-history.grace-period: 168h0m0s
|
||||
|
||||
# The point below which a node is punished for offline audits. Determined by calculating the ratio of online/total audits within each window and finding the average across windows within the tracking period.
|
||||
# overlay.audit-history.offline-threshold: 0.6
|
||||
|
||||
# The length of time to track audit windows for node suspension and disqualification
|
||||
# overlay.audit-history.tracking-period: 720h0m0s
|
||||
|
||||
# The length of time spanning a single audit window
|
||||
# overlay.audit-history.window-size: 12h0m0s
|
||||
|
||||
# disable node cache
|
||||
# overlay.node-selection-cache.disabled: false
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user