2019-10-15 16:29:47 +01:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package gracefulexit
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"sync"
|
2022-08-01 13:00:23 +01:00
|
|
|
"time"
|
2019-10-15 16:29:47 +01:00
|
|
|
|
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/rpc"
|
|
|
|
"storj.io/common/sync2"
|
2020-10-20 20:58:54 +01:00
|
|
|
"storj.io/storj/storagenode/piecetransfer"
|
2019-10-15 16:29:47 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
// Chore checks for satellites that the node is exiting and creates a worker per satellite to complete the process.
|
|
|
|
//
|
|
|
|
// architecture: Chore
|
|
|
|
type Chore struct {
|
2020-10-20 20:58:54 +01:00
|
|
|
log *zap.Logger
|
|
|
|
dialer rpc.Dialer
|
2019-10-15 16:29:47 +01:00
|
|
|
config Config
|
|
|
|
|
2022-08-02 12:12:01 +01:00
|
|
|
service *Service
|
2020-10-20 20:58:54 +01:00
|
|
|
transferService piecetransfer.Service
|
|
|
|
|
2019-10-15 16:29:47 +01:00
|
|
|
exitingMap sync.Map
|
2020-01-29 15:37:50 +00:00
|
|
|
Loop *sync2.Cycle
|
|
|
|
limiter *sync2.Limiter
|
2019-10-15 16:29:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewChore instantiates Chore.
|
2022-08-02 12:12:01 +01:00
|
|
|
func NewChore(log *zap.Logger, service *Service, transferService piecetransfer.Service, dialer rpc.Dialer, config Config) *Chore {
|
2019-10-15 16:29:47 +01:00
|
|
|
return &Chore{
|
2020-10-20 20:58:54 +01:00
|
|
|
log: log,
|
|
|
|
dialer: dialer,
|
|
|
|
service: service,
|
|
|
|
transferService: transferService,
|
|
|
|
config: config,
|
|
|
|
Loop: sync2.NewCycle(config.ChoreInterval),
|
|
|
|
limiter: sync2.NewLimiter(config.NumWorkers),
|
2019-10-15 16:29:47 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Run starts the chore.
|
|
|
|
func (chore *Chore) Run(ctx context.Context) (err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
2022-08-01 13:00:23 +01:00
|
|
|
defer chore.limiter.Wait()
|
|
|
|
return chore.Loop.Run(ctx, chore.AddMissing)
|
|
|
|
}
|
2019-10-15 16:29:47 +01:00
|
|
|
|
2022-08-01 13:00:23 +01:00
|
|
|
// AddMissing starts any missing satellite chore.
|
|
|
|
func (chore *Chore) AddMissing(ctx context.Context) (err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-10-15 16:29:47 +01:00
|
|
|
|
2022-08-01 13:00:23 +01:00
|
|
|
geSatellites, err := chore.service.ListPendingExits(ctx)
|
|
|
|
if err != nil {
|
|
|
|
chore.log.Error("error retrieving satellites.", zap.Error(err))
|
|
|
|
return nil
|
|
|
|
}
|
2020-05-19 17:11:30 +01:00
|
|
|
|
2022-08-01 13:00:23 +01:00
|
|
|
if len(geSatellites) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
chore.log.Debug("exiting", zap.Int("satellites", len(geSatellites)))
|
2019-10-15 16:29:47 +01:00
|
|
|
|
2022-08-01 13:00:23 +01:00
|
|
|
for _, satellite := range geSatellites {
|
|
|
|
mon.Meter("satellite_gracefulexit_request").Mark(1) //mon:locked
|
|
|
|
satellite := satellite
|
2020-01-08 02:33:41 +00:00
|
|
|
|
2022-08-01 13:00:23 +01:00
|
|
|
worker := NewWorker(chore.log, chore.service, chore.transferService, chore.dialer, satellite.NodeURL, chore.config)
|
|
|
|
if _, ok := chore.exitingMap.LoadOrStore(satellite.SatelliteID, worker); ok {
|
|
|
|
// already running a worker for this satellite
|
|
|
|
chore.log.Debug("skipping for satellite, worker already exists.", zap.Stringer("Satellite ID", satellite.SatelliteID))
|
|
|
|
continue
|
|
|
|
}
|
2019-12-17 15:06:47 +00:00
|
|
|
|
2022-08-01 13:00:23 +01:00
|
|
|
started := chore.limiter.Go(ctx, func() {
|
2022-08-02 12:06:52 +01:00
|
|
|
defer chore.exitingMap.Delete(satellite.SatelliteID)
|
|
|
|
if err := worker.Run(ctx); err != nil {
|
2022-08-01 13:00:23 +01:00
|
|
|
chore.log.Error("worker failed", zap.Error(err))
|
|
|
|
}
|
|
|
|
})
|
|
|
|
if !started {
|
2022-08-02 12:06:52 +01:00
|
|
|
chore.exitingMap.Delete(satellite.SatelliteID)
|
2022-08-01 13:00:23 +01:00
|
|
|
return ctx.Err()
|
|
|
|
}
|
|
|
|
}
|
2020-08-23 16:10:14 +01:00
|
|
|
|
2022-08-01 13:00:23 +01:00
|
|
|
return nil
|
2019-10-15 16:29:47 +01:00
|
|
|
}
|
|
|
|
|
2022-08-01 13:00:23 +01:00
|
|
|
// TestWaitForNoWorkers waits for any pending worker to finish.
|
|
|
|
func (chore *Chore) TestWaitForNoWorkers(ctx context.Context) error {
|
|
|
|
for {
|
|
|
|
if !sync2.Sleep(ctx, 100*time.Millisecond) {
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
found := false
|
|
|
|
chore.exitingMap.Range(func(key, value interface{}) bool {
|
|
|
|
found = true
|
|
|
|
return false
|
|
|
|
})
|
|
|
|
if !found {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
2020-08-23 16:10:14 +01:00
|
|
|
}
|
|
|
|
|
2019-10-15 16:29:47 +01:00
|
|
|
// Close closes chore.
|
|
|
|
func (chore *Chore) Close() error {
|
|
|
|
chore.Loop.Close()
|
|
|
|
return nil
|
|
|
|
}
|