storj/satellite/downtime/detection_chore.go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.

package downtime

import (
	"context"

	"go.uber.org/zap"

	"storj.io/common/sync2"
	"storj.io/storj/satellite/overlay"
)

// DetectionChore looks for nodes that have not checked in and tries to contact them.
//
// architecture: Chore
type DetectionChore struct {
	log     *zap.Logger
	Loop    *sync2.Cycle
	config  Config
	overlay *overlay.Service
	service *Service
}

// NewDetectionChore instantiates DetectionChore.
func NewDetectionChore(log *zap.Logger, config Config, overlay *overlay.Service, service *Service) *DetectionChore {
	return &DetectionChore{
		log:     log,
		Loop:    sync2.NewCycle(config.DetectionInterval),
		config:  config,
		overlay: overlay,
		service: service,
	}
}

// Run starts the chore.
func (chore *DetectionChore) Run(ctx context.Context) (err error) {
	defer mon.Task()(&ctx)(&err)
	return chore.Loop.Run(ctx, func(ctx context.Context) (err error) {
		defer mon.Task()(&ctx)(&err)

		chore.log.Debug("checking for nodes that have not had a successful check-in within the interval.",
			zap.Stringer("interval", chore.config.DetectionInterval))

		nodeLastContacts, err := chore.overlay.GetSuccesfulNodesNotCheckedInSince(ctx, chore.config.DetectionInterval)
		if err != nil {
			chore.log.Error("error retrieving node addresses for downtime detection.", zap.Error(err))
			return nil
		}
		chore.log.Debug("nodes that have had not had a successful check-in with the interval.",
			zap.Stringer("interval", chore.config.DetectionInterval),
			zap.Int("count", len(nodeLastContacts)))

		for _, nodeLastContact := range nodeLastContacts {
			success, err := chore.service.CheckAndUpdateNodeAvailability(ctx, nodeLastContact.ID, nodeLastContact.Address)
			if err != nil {
				chore.log.Error("error during downtime detection ping back.",
					zap.Bool("success", success),
					zap.Error(err))

				continue
			}
		}
		return nil
	})
}

// Close closes chore.
func (chore *DetectionChore) Close() error {
	chore.Loop.Close()
	return nil
}