satellite/overlay/config.go: Add AuditHistoryConfig to overlay

Adds AuditHistory{WindowSize, TrackingPeriod, GracePeriod,
OfflineThreshold}. These values will be used to track offline audits over
time, and to suspend/disqualify nodes for being offline for too long.

Change-Id: I05f7dbc3c034bdc53c4fbd7719c71a44f37ec6a5
This commit is contained in:
Moby von Briesen 2020-07-24 17:21:25 -04:00 committed by Maximillian von Briesen
parent 5dfe27f175
commit e02adfe5e9
3 changed files with 28 additions and 0 deletions

View File

@ -415,6 +415,12 @@ func (planet *Planet) newSatellites(count int, satelliteDatabases satellitedbtes
Staleness: 3 * time.Minute,
},
UpdateStatsBatchSize: 100,
AuditHistory: overlay.AuditHistoryConfig{
WindowSize: 10 * time.Minute,
TrackingPeriod: time.Hour,
GracePeriod: time.Hour,
OfflineThreshold: 0.6,
},
},
Metainfo: metainfo.Config{
DatabaseURL: "", // not used

View File

@ -23,6 +23,7 @@ type Config struct {
Node NodeSelectionConfig
NodeSelectionCache CacheConfig
UpdateStatsBatchSize int `help:"number of update requests to process per transaction" default:"100"`
AuditHistory AuditHistoryConfig
}
// NodeSelectionConfig is a configuration struct to determine the minimum
@ -44,3 +45,12 @@ type NodeSelectionConfig struct {
SuspensionGracePeriod time.Duration `help:"the time period that must pass before suspended nodes will be disqualified" releaseDefault:"168h" devDefault:"1h"`
SuspensionDQEnabled bool `help:"whether nodes will be disqualified if they have been suspended for longer than the suspended grace period" releaseDefault:"false" devDefault:"true"`
}
// AuditHistoryConfig is a configuration struct defining time periods and thresholds for penalizing nodes for being offline.
// It is used for downtime suspension and disqualification.
type AuditHistoryConfig struct {
WindowSize time.Duration `help:"The length of time spanning a single audit window" releaseDefault:"12h" devDefault:"5m"`
TrackingPeriod time.Duration `help:"The length of time to track audit windows for node suspension and disqualification" releaseDefault:"720h" devDefault:"1h"`
GracePeriod time.Duration `help:"The length of time to give suspended SNOs to diagnose and fix issues causing downtime. Afterwards, they will have one tracking period to reach the minimum online score before disqualification" releaseDefault:"168h" devDefault:"1h"`
OfflineThreshold float64 `help:"The point below which a node is punished for offline audits. Determined by calculating the ratio of online/total audits within each window and finding the average across windows within the tracking period." default:"0.6"`
}

12
scripts/testdata/satellite-config.yaml.lock vendored Normal file → Executable file
View File

@ -454,6 +454,18 @@ identity.key-path: /root/.local/share/storj/identity/satellite/identity.key
# rollout phase for the windowed endpoint
# orders.window-endpoint-rollout-phase: phase1
# The length of time to give suspended SNOs to diagnose and fix issues causing downtime. Afterwards, they will have one tracking period to reach the minimum online score before disqualification
# overlay.audit-history.grace-period: 168h0m0s
# The point below which a node is punished for offline audits. Determined by calculating the ratio of online/total audits within each window and finding the average across windows within the tracking period.
# overlay.audit-history.offline-threshold: 0.6
# The length of time to track audit windows for node suspension and disqualification
# overlay.audit-history.tracking-period: 720h0m0s
# The length of time spanning a single audit window
# overlay.audit-history.window-size: 12h0m0s
# disable node cache
# overlay.node-selection-cache.disabled: false