storagenode/gracefulexit: omit finished exits from ListPendingExits

From the name of the function and from the way it is used (only called in one place, from "storj.io/storagenode/gracefulexit".(*Chore).Run()), it should not return graceful exits that have already completed. In particular, this causes a problem in the case that a node has already completed a graceful exit from one satellite, after which the satellite was decommissioned and no longer in the "trusted" list. This causes an error message to show up in the node logs every single minute like "failed to get satellite address ... satellite \"X\" is untrusted". https://forum.storj.io/t/error-gracefulexit-service-failed-to-get-satellite-address/11372 This change causes ListPendingExits to list pending exits only, not all exits. Correspondingly, the check for whether an exit is already completed, in (*Chore).Run(), becomes unnecessary and is here removed. Change-Id: Ia3e9bb3e92be4a32ebcbda0321e3fe61d77deaa8
2021-01-29 14:00:38 -06:00 · 2021-01-29 14:00:38 -06:00 · c489a70e62
commit c489a70e62
parent 91bd4191dd
2 changed files with 3 additions and 3 deletions
--- a/storagenode/gracefulexit/chore.go
+++ b/storagenode/gracefulexit/chore.go
@ -64,9 +64,6 @@ func (chore *Chore) Run(ctx context.Context) (err error) {
 		for _, satellite := range geSatellites {
 			mon.Meter("satellite_gracefulexit_request").Mark(1) //mon:locked
 			satellite := satellite
-			if satellite.FinishedAt != nil {
-				continue
-			}

 			worker := NewWorker(chore.log, chore.service, chore.transferService, chore.dialer, satellite.NodeURL, chore.config)
 			if _, ok := chore.exitingMap.LoadOrStore(satellite.SatelliteID, worker); ok {
--- a/storagenode/gracefulexit/service.go
+++ b/storagenode/gracefulexit/service.go
@ -92,6 +92,9 @@ func (c *service) ListPendingExits(ctx context.Context) (_ []ExitingSatellite, e
 	}
 	exitingSatellites := make([]ExitingSatellite, 0, len(exitProgress))
 	for _, sat := range exitProgress {
+		if sat.FinishedAt != nil {
+			continue
+		}
 		nodeURL, err := c.trust.GetNodeURL(ctx, sat.SatelliteID)
 		if err != nil {
 			c.log.Error("failed to get satellite address", zap.Stringer("Satellite ID", sat.SatelliteID), zap.Error(err))