storagenode/gracefulexit: omit finished exits from ListPendingExits

From the name of the function and from the way it is used (only called
in one place, from "storj.io/storagenode/gracefulexit".(*Chore).Run()),
it should not return graceful exits that have already completed.

In particular, this causes a problem in the case that a node has already
completed a graceful exit from one satellite, after which the satellite
was decommissioned and no longer in the "trusted" list. This causes an
error message to show up in the node logs every single minute like
"failed to get satellite address ... satellite \"X\" is untrusted".

https://forum.storj.io/t/error-gracefulexit-service-failed-to-get-satellite-address/11372

This change causes ListPendingExits to list pending exits only, not all
exits.

Correspondingly, the check for whether an exit is already completed, in
(*Chore).Run(), becomes unnecessary and is here removed.

Change-Id: Ia3e9bb3e92be4a32ebcbda0321e3fe61d77deaa8
This commit is contained in:
paul cannon 2021-01-29 14:00:38 -06:00
parent 91bd4191dd
commit c489a70e62
2 changed files with 3 additions and 3 deletions

View File

@ -64,9 +64,6 @@ func (chore *Chore) Run(ctx context.Context) (err error) {
for _, satellite := range geSatellites {
mon.Meter("satellite_gracefulexit_request").Mark(1) //mon:locked
satellite := satellite
if satellite.FinishedAt != nil {
continue
}
worker := NewWorker(chore.log, chore.service, chore.transferService, chore.dialer, satellite.NodeURL, chore.config)
if _, ok := chore.exitingMap.LoadOrStore(satellite.SatelliteID, worker); ok {

View File

@ -92,6 +92,9 @@ func (c *service) ListPendingExits(ctx context.Context) (_ []ExitingSatellite, e
}
exitingSatellites := make([]ExitingSatellite, 0, len(exitProgress))
for _, sat := range exitProgress {
if sat.FinishedAt != nil {
continue
}
nodeURL, err := c.trust.GetNodeURL(ctx, sat.SatelliteID)
if err != nil {
c.log.Error("failed to get satellite address", zap.Stringer("Satellite ID", sat.SatelliteID), zap.Error(err))