From b1bb665c78316958842aab90acead1a789364216 Mon Sep 17 00:00:00 2001 From: Ethan Date: Fri, 29 May 2020 10:45:46 -0400 Subject: [PATCH] satellite/metainfo: Handle "server is not accepting clients" error during CRDB node rejoins https: //storjlabs.atlassian.net/browse/SM-1035 Change-Id: I27243b0d8fc3250916c86ceb915f973cbf80f656 --- private/dbutil/cockroachutil/driver.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/private/dbutil/cockroachutil/driver.go b/private/dbutil/cockroachutil/driver.go index 13c808a88..bed643dbc 100644 --- a/private/dbutil/cockroachutil/driver.go +++ b/private/dbutil/cockroachutil/driver.go @@ -90,6 +90,7 @@ func (c *cockroachConn) Close() error { func (c *cockroachConn) ExecContext(ctx context.Context, query string, args []driver.NamedValue) (driver.Result, error) { result, err := c.underlying.ExecContext(ctx, query, args) for err != nil && !c.isInTransaction() && NeedsRetry(err) { + mon.Event("needed_retry") result, err = c.underlying.ExecContext(ctx, query, args) } return result, err @@ -149,6 +150,7 @@ func (c *cockroachConn) QueryContext(ctx context.Context, query string, args []d if c.isInTransaction() { return nil, err } + mon.Event("needed_retry") continue } return nil, err @@ -162,6 +164,7 @@ func (c *cockroachConn) QueryContext(ctx context.Context, query string, args []d if c.isInTransaction() { return nil, err } + mon.Event("needed_retry") continue } return nil, err @@ -248,6 +251,7 @@ func (stmt *cockroachStmt) Exec(args []driver.Value) (driver.Result, error) { } result, err := stmt.underlyingStmt.ExecContext(context.Background(), namedArgs) for err != nil && !stmt.conn.isInTransaction() && NeedsRetry(err) { + mon.Event("needed_retry") result, err = stmt.underlyingStmt.ExecContext(context.Background(), namedArgs) } return result, err @@ -268,6 +272,7 @@ func (stmt *cockroachStmt) Query(args []driver.Value) (driver.Rows, error) { func (stmt *cockroachStmt) ExecContext(ctx context.Context, args []driver.NamedValue) (driver.Result, error) { result, err := stmt.underlyingStmt.ExecContext(ctx, args) for err != nil && !stmt.conn.isInTransaction() && NeedsRetry(err) { + mon.Event("needed_retry") result, err = stmt.underlyingStmt.ExecContext(ctx, args) } return result, err @@ -283,6 +288,7 @@ func (stmt *cockroachStmt) QueryContext(ctx context.Context, args []driver.Named if stmt.conn.isInTransaction() { return nil, err } + mon.Event("needed_retry") continue } return nil, err @@ -296,6 +302,7 @@ func (stmt *cockroachStmt) QueryContext(ctx context.Context, args []driver.Named if stmt.conn.isInTransaction() { return nil, err } + mon.Event("needed_retry") continue } return nil, err @@ -317,7 +324,12 @@ func translateName(name string) string { // borrowed from code in crdb. func NeedsRetry(err error) bool { code := errCode(err) - return code == "40001" || code == "CR000" + + // 57P01 occurs when a CRDB node rejoins the cluster but is not ready to accept connections + // CRDB support recommended a retry at this point + // Support ticket: https://support.cockroachlabs.com/hc/en-us/requests/5510 + // TODO re-evaluate this if support provides a better solution + return code == "40001" || code == "CR000" || code == "57P01" } // borrowed from crdb