5d016425f1
Change-Id: I555a479a89e0ddbf0499898bdbc8574282cd6846
74 lines
2.0 KiB
Go
74 lines
2.0 KiB
Go
// Copyright (C) 2019 Storj Labs, Inc.
|
|
// See LICENSE for copying information.
|
|
|
|
package downtime
|
|
|
|
import (
|
|
"context"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
"storj.io/common/sync2"
|
|
"storj.io/storj/satellite/overlay"
|
|
)
|
|
|
|
// DetectionChore looks for nodes that have not checked in and tries to contact them.
|
|
//
|
|
// architecture: Chore
|
|
type DetectionChore struct {
|
|
log *zap.Logger
|
|
Loop *sync2.Cycle
|
|
config Config
|
|
overlay *overlay.Service
|
|
service *Service
|
|
}
|
|
|
|
// NewDetectionChore instantiates DetectionChore.
|
|
func NewDetectionChore(log *zap.Logger, config Config, overlay *overlay.Service, service *Service) *DetectionChore {
|
|
return &DetectionChore{
|
|
log: log,
|
|
Loop: sync2.NewCycle(config.DetectionInterval),
|
|
config: config,
|
|
overlay: overlay,
|
|
service: service,
|
|
}
|
|
}
|
|
|
|
// Run starts the chore.
|
|
func (chore *DetectionChore) Run(ctx context.Context) (err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
return chore.Loop.Run(ctx, func(ctx context.Context) (err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
chore.log.Debug("checking for nodes that have not had a successful check-in within the interval.",
|
|
zap.Stringer("interval", chore.config.DetectionInterval))
|
|
|
|
nodeLastContacts, err := chore.overlay.GetSuccesfulNodesNotCheckedInSince(ctx, chore.config.DetectionInterval)
|
|
if err != nil {
|
|
chore.log.Error("error retrieving node addresses for downtime detection.", zap.Error(err))
|
|
return nil
|
|
}
|
|
chore.log.Debug("nodes that have had not had a successful check-in with the interval.",
|
|
zap.Stringer("interval", chore.config.DetectionInterval),
|
|
zap.Int("count", len(nodeLastContacts)))
|
|
|
|
for _, nodeLastContact := range nodeLastContacts {
|
|
success, err := chore.service.CheckAndUpdateNodeAvailability(ctx, nodeLastContact.URL)
|
|
if err != nil {
|
|
chore.log.Error("error during downtime detection ping back.",
|
|
zap.Bool("success", success),
|
|
zap.Error(err))
|
|
|
|
continue
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// Close closes chore.
|
|
func (chore *DetectionChore) Close() error {
|
|
chore.Loop.Close()
|
|
return nil
|
|
}
|