storj/storagenode/contact/chore.go
2020-12-05 17:01:42 +01:00

152 lines
3.8 KiB
Go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package contact
import (
"context"
"sync"
"time"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
"storj.io/common/storj"
"storj.io/common/sync2"
)
// Chore is the contact chore for nodes announcing themselves to their trusted satellites.
//
// architecture: Chore
type Chore struct {
log *zap.Logger
service *Service
mu sync.Mutex
cycles map[storj.NodeID]*sync2.Cycle
started sync2.Fence
interval time.Duration
}
// NewChore creates a new contact chore.
func NewChore(log *zap.Logger, interval time.Duration, service *Service) *Chore {
return &Chore{
log: log,
service: service,
cycles: make(map[storj.NodeID]*sync2.Cycle),
interval: interval,
}
}
// Run the contact chore on a regular interval with jitter.
func (chore *Chore) Run(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
var group errgroup.Group
if !chore.service.initialized.Wait(ctx) {
return ctx.Err()
}
// configure the satellite ping cycles
chore.updateCycles(ctx, &group, chore.service.trust.GetSatellites(ctx))
// set up a cycle to update ping cycles on a frequent interval
refreshCycle := sync2.NewCycle(time.Minute)
refreshCycle.Start(ctx, &group, func(ctx context.Context) error {
chore.updateCycles(ctx, &group, chore.service.trust.GetSatellites(ctx))
return nil
})
defer refreshCycle.Close()
chore.started.Release()
return group.Wait()
}
func (chore *Chore) updateCycles(ctx context.Context, group *errgroup.Group, satellites []storj.NodeID) {
chore.mu.Lock()
defer chore.mu.Unlock()
trustedIDs := make(map[storj.NodeID]struct{})
for _, satellite := range satellites {
satellite := satellite // alias the loop var since it is captured below
trustedIDs[satellite] = struct{}{}
if _, ok := chore.cycles[satellite]; ok {
// Ping cycle has already been started for this satellite
continue
}
// Set up a new ping cycle for the newly trusted satellite
chore.log.Debug("Starting cycle", zap.Stringer("Satellite ID", satellite))
cycle := sync2.NewCycle(chore.interval)
chore.cycles[satellite] = cycle
cycle.Start(ctx, group, func(ctx context.Context) error {
return chore.service.pingSatellite(ctx, satellite, chore.interval)
})
}
// Stop the ping cycle for satellites that are no longer trusted
for satellite, cycle := range chore.cycles {
if _, ok := trustedIDs[satellite]; !ok {
chore.log.Debug("Stopping cycle", zap.Stringer("Satellite ID", satellite))
cycle.Close()
delete(chore.cycles, satellite)
}
}
}
// Pause stops all the cycles in the contact chore.
func (chore *Chore) Pause(ctx context.Context) {
chore.started.Wait(ctx)
chore.mu.Lock()
defer chore.mu.Unlock()
for _, cycle := range chore.cycles {
cycle.Pause()
}
}
// Trigger ensures that each cycle is done at least once.
// If the cycle is currently running it waits for the previous to complete and then runs.
func (chore *Chore) Trigger(ctx context.Context) {
chore.started.Wait(ctx)
chore.mu.Lock()
defer chore.mu.Unlock()
for _, cycle := range chore.cycles {
cycle := cycle
go func() {
cycle.Trigger()
}()
}
}
// TriggerWait ensures that each cycle is done at least once and waits for completion.
// If the cycle is currently running it waits for the previous to complete and then runs.
func (chore *Chore) TriggerWait(ctx context.Context) {
chore.started.Wait(ctx)
chore.mu.Lock()
defer chore.mu.Unlock()
var group errgroup.Group
for _, cycle := range chore.cycles {
cycle := cycle
group.Go(func() error {
cycle.TriggerWait()
return nil
})
}
_ = group.Wait() // goroutines aren't returning any errors
}
// Close stops all the cycles in the contact chore.
func (chore *Chore) Close() error {
chore.mu.Lock()
defer chore.mu.Unlock()
for _, cycle := range chore.cycles {
cycle.Close()
}
chore.cycles = nil
return nil
}