From 985ccbe721b178a02475e1233024066bd13ec000 Mon Sep 17 00:00:00 2001 From: paul cannon Date: Wed, 30 Mar 2022 16:19:00 -0500 Subject: [PATCH] satellite/repair: in dns redial, don't retry if CloseError To save load on DNS servers, the repair code first tries to dial the last known good ip and port for a node, and then falls back to a DNS lookup only if we fail to connect to the last known good ip and port. However, it looks like we are seeing errors during the client stream Close() call (probably due to quic-go code), and those are classified the same as errors encountered during Dial. The repairer code sees this error, assumes that we failed to contact the node, and retries- but since we did actually succeed in connecting the first time around, this results in submitting the same order limit (with the same serial number) to the storage node, which (rightfully) rejects it. So together with change I055c186d5fd4e79560f67763175bc3130b9bc7d2 in storj/uplink, this should avoid the double submission and avoid dinging nodes' suspension scores unfairly. See https://github.com/storj/storj/issues/4687. Also, moving the testsuite directory check up above check-monkit in the Jenkins Lint task, so that a non-tidy testsuite/go.mod can be recognized and handled before everything breaks weirdly and seemingly randomly later on. Change-Id: Icb2b05aaff921d0af6aba10e450ac7e0a7bb2655 --- Jenkinsfile.public | 6 ++++-- go.mod | 2 +- go.sum | 4 ++-- satellite/repair/repairer/ec.go | 2 +- testsuite/go.mod | 2 +- testsuite/go.sum | 4 ++-- 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Jenkinsfile.public b/Jenkinsfile.public index 9c6a00340..dbaefa157 100644 --- a/Jenkinsfile.public +++ b/Jenkinsfile.public @@ -134,6 +134,10 @@ pipeline { parallel { stage('Lint') { steps { + sh 'check-mod-tidy -mod .build/go.mod.orig' + dir("testsuite") { + sh 'check-mod-tidy -mod ../.build/testsuite.go.mod.orig' + } sh 'check-copyright' sh 'check-imports -race ./...' sh 'check-peer-constraints -race' @@ -142,7 +146,6 @@ pipeline { sh 'check-errs ./...' sh 'staticcheck ./...' sh 'golangci-lint --config /go/ci/.golangci.yml -j=2 run' - sh 'check-mod-tidy -mod .build/go.mod.orig' sh 'make check-monitoring' sh 'make test-wasm-size' @@ -155,7 +158,6 @@ pipeline { sh 'check-errs ./...' sh 'staticcheck ./...' sh 'golangci-lint --config /go/ci/.golangci.yml -j=2 run' - sh 'check-mod-tidy -mod ../.build/testsuite.go.mod.orig' } dir("satellite/admin/ui") { diff --git a/go.mod b/go.mod index 7222ceafc..f1dc8314d 100644 --- a/go.mod +++ b/go.mod @@ -55,7 +55,7 @@ require ( storj.io/drpc v0.0.30 storj.io/monkit-jaeger v0.0.0-20220131130547-dc4cb5a0d97a storj.io/private v0.0.0-20220323114243-08c4b5e752f4 - storj.io/uplink v1.8.2-0.20220329143354-9065e58fc5df + storj.io/uplink v1.8.2-0.20220401124330-4a78b8e6431f ) require ( diff --git a/go.sum b/go.sum index 3d95781d9..a6e5ca3e4 100644 --- a/go.sum +++ b/go.sum @@ -929,5 +929,5 @@ storj.io/monkit-jaeger v0.0.0-20220131130547-dc4cb5a0d97a h1:qads+aZlFKm5gUxobfF storj.io/monkit-jaeger v0.0.0-20220131130547-dc4cb5a0d97a/go.mod h1:DGEycSjvzE0JqcD3+6IjwPEK6x30oOus6AApXzl7t0s= storj.io/private v0.0.0-20220323114243-08c4b5e752f4 h1:szxLYr9Rdmx19unt47kafB3816JFI5esPlrzdMyZhvk= storj.io/private v0.0.0-20220323114243-08c4b5e752f4/go.mod h1:fZ7FSXv/adIc79sF/5qm7zn0PI5+PWa5p+dbqrZQARM= -storj.io/uplink v1.8.2-0.20220329143354-9065e58fc5df h1:MhKJYHYz5nyF5Y7ed0kyhyDCkIcqeAQq6z29YVVHHpo= -storj.io/uplink v1.8.2-0.20220329143354-9065e58fc5df/go.mod h1:wGaBfQPOAu55YwGOoe5D5jb0k+hyS/S1SPUxHCQpLE0= +storj.io/uplink v1.8.2-0.20220401124330-4a78b8e6431f h1:1LpQus+WTmF0/Gno1xTEPc78ABy1gT66AXjCRUwJm/g= +storj.io/uplink v1.8.2-0.20220401124330-4a78b8e6431f/go.mod h1:wGaBfQPOAu55YwGOoe5D5jb0k+hyS/S1SPUxHCQpLE0= diff --git a/satellite/repair/repairer/ec.go b/satellite/repair/repairer/ec.go index 0c227ad4f..e39da595e 100644 --- a/satellite/repair/repairer/ec.go +++ b/satellite/repair/repairer/ec.go @@ -134,7 +134,7 @@ func (ec *ECRepairer) Get(ctx context.Context, limits []*pb.AddressedOrderLimit, pieceReadCloser, _, _, err := ec.downloadAndVerifyPiece(ctx, limit, address, privateKey, "", pieceSize) // if piecestore dial with last ip:port failed try again with node address - if triedLastIPPort && piecestore.Error.Has(err) { + if triedLastIPPort && piecestore.Error.Has(err) && !piecestore.CloseError.Has(err) { if pieceReadCloser != nil { _ = pieceReadCloser.Close() } diff --git a/testsuite/go.mod b/testsuite/go.mod index 5ca903daf..c0b3bdd47 100644 --- a/testsuite/go.mod +++ b/testsuite/go.mod @@ -220,5 +220,5 @@ require ( storj.io/gateway v1.4.1 // indirect storj.io/minio v0.0.0-20211007171754-df6c27823c8a // indirect storj.io/monkit-jaeger v0.0.0-20220131130547-dc4cb5a0d97a // indirect - storj.io/uplink v1.8.2-0.20220329143354-9065e58fc5df // indirect + storj.io/uplink v1.8.2-0.20220401124330-4a78b8e6431f // indirect ) diff --git a/testsuite/go.sum b/testsuite/go.sum index 8cafc008f..4a11464a2 100644 --- a/testsuite/go.sum +++ b/testsuite/go.sum @@ -1477,5 +1477,5 @@ storj.io/private v0.0.0-20220323114243-08c4b5e752f4 h1:szxLYr9Rdmx19unt47kafB381 storj.io/private v0.0.0-20220323114243-08c4b5e752f4/go.mod h1:fZ7FSXv/adIc79sF/5qm7zn0PI5+PWa5p+dbqrZQARM= storj.io/uplink v1.7.0/go.mod h1:zqj/LFDxa6RMaSRSHOmukg3mMgesOry0iHSjNldDMGo= storj.io/uplink v1.7.1-0.20211103104100-a785482780d8/go.mod h1:pKqsMpNMIAz//2TXzUGOR6tpu3iyabvXV4VWINj4jaY= -storj.io/uplink v1.8.2-0.20220329143354-9065e58fc5df h1:MhKJYHYz5nyF5Y7ed0kyhyDCkIcqeAQq6z29YVVHHpo= -storj.io/uplink v1.8.2-0.20220329143354-9065e58fc5df/go.mod h1:wGaBfQPOAu55YwGOoe5D5jb0k+hyS/S1SPUxHCQpLE0= +storj.io/uplink v1.8.2-0.20220401124330-4a78b8e6431f h1:1LpQus+WTmF0/Gno1xTEPc78ABy1gT66AXjCRUwJm/g= +storj.io/uplink v1.8.2-0.20220401124330-4a78b8e6431f/go.mod h1:wGaBfQPOAu55YwGOoe5D5jb0k+hyS/S1SPUxHCQpLE0=