2019-10-07 21:38:05 +01:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package gracefulexit
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2019-10-24 17:24:42 +01:00
|
|
|
"database/sql"
|
2019-10-07 21:38:05 +01:00
|
|
|
"time"
|
|
|
|
|
2019-10-24 17:24:42 +01:00
|
|
|
"github.com/zeebo/errs"
|
2019-10-07 21:38:05 +01:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/storj"
|
|
|
|
"storj.io/common/sync2"
|
2019-10-07 21:38:05 +01:00
|
|
|
"storj.io/storj/satellite/metainfo"
|
|
|
|
"storj.io/storj/satellite/overlay"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Chore populates the graceful exit transfer queue.
|
|
|
|
//
|
|
|
|
// architecture: Chore
|
|
|
|
type Chore struct {
|
|
|
|
log *zap.Logger
|
|
|
|
Loop sync2.Cycle
|
|
|
|
db DB
|
|
|
|
config Config
|
|
|
|
overlay overlay.DB
|
|
|
|
metainfoLoop *metainfo.Loop
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewChore instantiates Chore.
|
2019-10-11 22:18:05 +01:00
|
|
|
func NewChore(log *zap.Logger, db DB, overlay overlay.DB, metaLoop *metainfo.Loop, config Config) *Chore {
|
2019-10-07 21:38:05 +01:00
|
|
|
return &Chore{
|
|
|
|
log: log,
|
|
|
|
Loop: *sync2.NewCycle(config.ChoreInterval),
|
|
|
|
db: db,
|
|
|
|
config: config,
|
|
|
|
overlay: overlay,
|
|
|
|
metainfoLoop: metaLoop,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Run starts the chore.
|
|
|
|
func (chore *Chore) Run(ctx context.Context) (err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
return chore.Loop.Run(ctx, func(ctx context.Context) (err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-10-30 08:21:27 +00:00
|
|
|
chore.log.Debug("checking pending exits")
|
2019-10-07 21:38:05 +01:00
|
|
|
|
2019-10-24 17:24:42 +01:00
|
|
|
exitingNodes, err := chore.overlay.GetExitingNodes(ctx)
|
2019-10-07 21:38:05 +01:00
|
|
|
if err != nil {
|
2019-10-24 17:24:42 +01:00
|
|
|
chore.log.Error("error retrieving nodes that have not finished exiting", zap.Error(err))
|
2019-10-07 21:38:05 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
nodeCount := len(exitingNodes)
|
2019-10-30 08:21:27 +00:00
|
|
|
chore.log.Debug("graceful exit", zap.Int("exitingNodes", nodeCount))
|
2019-10-07 21:38:05 +01:00
|
|
|
if nodeCount == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-10-24 17:24:42 +01:00
|
|
|
exitingNodesLoopIncomplete := make(storj.NodeIDList, 0, nodeCount)
|
|
|
|
for _, node := range exitingNodes {
|
|
|
|
if node.ExitLoopCompletedAt == nil {
|
|
|
|
exitingNodesLoopIncomplete = append(exitingNodesLoopIncomplete, node.NodeID)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
progress, err := chore.db.GetProgress(ctx, node.NodeID)
|
|
|
|
if err != nil && !errs.Is(err, sql.ErrNoRows) {
|
|
|
|
chore.log.Error("error retrieving progress for node", zap.Stringer("Node ID", node.NodeID), zap.Error(err))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
lastActivityTime := *node.ExitLoopCompletedAt
|
|
|
|
if progress != nil {
|
|
|
|
lastActivityTime = progress.UpdatedAt
|
|
|
|
}
|
|
|
|
|
|
|
|
// check inactive timeframe
|
|
|
|
if lastActivityTime.Add(chore.config.MaxInactiveTimeFrame).Before(time.Now().UTC()) {
|
|
|
|
exitStatusRequest := &overlay.ExitStatusRequest{
|
|
|
|
NodeID: node.NodeID,
|
|
|
|
ExitSuccess: false,
|
|
|
|
ExitFinishedAt: time.Now().UTC(),
|
|
|
|
}
|
2019-10-29 20:22:20 +00:00
|
|
|
mon.Meter("graceful_exit_fail_inactive").Mark(1)
|
2019-10-24 17:24:42 +01:00
|
|
|
_, err = chore.overlay.UpdateExitStatus(ctx, exitStatusRequest)
|
|
|
|
if err != nil {
|
|
|
|
chore.log.Error("error updating exit status", zap.Error(err))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// remove all items from the transfer queue
|
|
|
|
err := chore.db.DeleteTransferQueueItems(ctx, node.NodeID)
|
|
|
|
if err != nil {
|
|
|
|
chore.log.Error("error deleting node from transfer queue", zap.Error(err))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Populate transfer queue for nodes that have not completed the exit loop yet
|
|
|
|
pathCollector := NewPathCollector(chore.db, exitingNodesLoopIncomplete, chore.log, chore.config.ChoreBatchSize)
|
2019-10-07 21:38:05 +01:00
|
|
|
err = chore.metainfoLoop.Join(ctx, pathCollector)
|
|
|
|
if err != nil {
|
|
|
|
chore.log.Error("error joining metainfo loop.", zap.Error(err))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
err = pathCollector.Flush(ctx)
|
|
|
|
if err != nil {
|
|
|
|
chore.log.Error("error flushing collector buffer.", zap.Error(err))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
now := time.Now().UTC()
|
2019-10-24 17:24:42 +01:00
|
|
|
for _, nodeID := range exitingNodesLoopIncomplete {
|
2019-10-07 21:38:05 +01:00
|
|
|
exitStatus := overlay.ExitStatusRequest{
|
|
|
|
NodeID: nodeID,
|
|
|
|
ExitLoopCompletedAt: now,
|
|
|
|
}
|
|
|
|
_, err = chore.overlay.UpdateExitStatus(ctx, &exitStatus)
|
|
|
|
if err != nil {
|
|
|
|
chore.log.Error("error updating exit status.", zap.Error(err))
|
|
|
|
}
|
2019-10-29 20:22:20 +00:00
|
|
|
|
|
|
|
bytesToTransfer := pathCollector.nodeIDStorage[nodeID]
|
|
|
|
mon.IntVal("graceful_exit_init_bytes_stored").Observe(bytesToTransfer)
|
2019-10-07 21:38:05 +01:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close closes chore.
|
|
|
|
func (chore *Chore) Close() error {
|
|
|
|
chore.Loop.Close()
|
|
|
|
return nil
|
|
|
|
}
|