storagenode/monitor: add option to log only when verification check fails
This is not recommended for most nodes; leaving your node running when it can't handle requests fast enough is a good way to fail audits and get disqualified, which may happen before you even know about the problem. But some Windows users are finding that this is being triggered regularly on their nodes, and that it apparently causes the whole system to lock up occasionally. We are adding this option as a way to mitigate that problem until we can collect more information. Change-Id: I7a652b0f9f970bbb9ed9f2cb3ad1cb89d90db8d7
This commit is contained in:
parent
4c05293d8b
commit
556250911c
@ -46,6 +46,7 @@ type Config struct {
|
||||
VerifyDirWritableInterval time.Duration `help:"how frequently to verify writability of storage directory" releaseDefault:"5m" devDefault:"30s"`
|
||||
VerifyDirReadableTimeout time.Duration `help:"how long to wait for a storage directory readability verification to complete" releaseDefault:"1m" devDefault:"10s"`
|
||||
VerifyDirWritableTimeout time.Duration `help:"how long to wait for a storage directory writability verification to complete" releaseDefault:"1m" devDefault:"10s"`
|
||||
VerifyDirWarnOnly bool `help:"if the storage directory verification check fails, log a warning instead of killing the node" default:"false"`
|
||||
MinimumDiskSpace memory.Size `help:"how much disk space a node at minimum has to advertise" default:"500GB"`
|
||||
MinimumBandwidth memory.Size `help:"how much bandwidth a node at minimum has to advertise (deprecated)" default:"0TB"`
|
||||
NotifyLowDiskCooldown time.Duration `help:"minimum length of time between capacity reports" default:"10m" hidden:"true"`
|
||||
@ -134,8 +135,16 @@ func (service *Service) Run(ctx context.Context) (err error) {
|
||||
err := service.store.VerifyStorageDirWithTimeout(ctx, service.contact.Local().ID, timeout)
|
||||
if err != nil {
|
||||
if errs.Is(err, context.DeadlineExceeded) {
|
||||
if service.Config.VerifyDirWarnOnly {
|
||||
service.log.Error("timed out while verifying readability of storage directory", zap.Duration("timeout", timeout))
|
||||
return nil
|
||||
}
|
||||
return Error.New("timed out after %v while verifying readability of storage directory", timeout)
|
||||
}
|
||||
if service.Config.VerifyDirWarnOnly {
|
||||
service.log.Error("error verifying location and/or readability of storage directory", zap.Error(err))
|
||||
return nil
|
||||
}
|
||||
return Error.New("error verifying location and/or readability of storage directory: %v", err)
|
||||
}
|
||||
return nil
|
||||
@ -147,8 +156,16 @@ func (service *Service) Run(ctx context.Context) (err error) {
|
||||
err := service.store.CheckWritabilityWithTimeout(ctx, timeout)
|
||||
if err != nil {
|
||||
if errs.Is(err, context.DeadlineExceeded) {
|
||||
if service.Config.VerifyDirWarnOnly {
|
||||
service.log.Error("timed out while verifying writability of storage directory", zap.Duration("timeout", timeout))
|
||||
return nil
|
||||
}
|
||||
return Error.New("timed out after %v while verifying writability of storage directory", timeout)
|
||||
}
|
||||
if service.Config.VerifyDirWarnOnly {
|
||||
service.log.Error("error verifying writability of storage directory", zap.Error(err))
|
||||
return nil
|
||||
}
|
||||
return Error.New("error verifying writability of storage directory: %v", err)
|
||||
}
|
||||
return nil
|
||||
|
Loading…
Reference in New Issue
Block a user