overlay cache: slight modification of node-is-online rules (#2490)

This commit is contained in:
JT Olio 2019-07-09 22:36:09 -04:00 committed by littleskunk
parent 1c5db71faf
commit a79c7d77f3
5 changed files with 33 additions and 17 deletions

View File

@ -4,6 +4,8 @@
package testplanet package testplanet
import ( import (
"time"
"go.uber.org/zap" "go.uber.org/zap"
"storj.io/storj/bootstrap" "storj.io/storj/bootstrap"
@ -37,3 +39,11 @@ var DisablePeerCAWhitelist = Reconfigure{
config.Server.UsePeerCAWhitelist = false config.Server.UsePeerCAWhitelist = false
}, },
} }
// ShortenOnlineWindow returns a `Reconfigure` that sets the NodeSelection
// OnlineWindow to 1 second, meaning a connection failure leads to marking the nodes as offline
var ShortenOnlineWindow = Reconfigure{
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
config.Overlay.Node.OnlineWindow = 1 * time.Second
},
}

View File

@ -104,7 +104,7 @@ func (planet *Planet) newSatellites(count int) ([]*satellite.Peer, error) {
UptimeCount: 0, UptimeCount: 0,
AuditCount: 0, AuditCount: 0,
NewNodePercentage: 0, NewNodePercentage: 0,
OnlineWindow: time.Hour, OnlineWindow: 0,
DistinctIP: false, DistinctIP: false,
AuditReputationRepairWeight: 1, AuditReputationRepairWeight: 1,

View File

@ -7,6 +7,7 @@ import (
"testing" "testing"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"go.uber.org/zap"
"storj.io/storj/internal/memory" "storj.io/storj/internal/memory"
"storj.io/storj/internal/testcontext" "storj.io/storj/internal/testcontext"
@ -31,6 +32,11 @@ func TestDataRepair(t *testing.T) {
SatelliteCount: 1, SatelliteCount: 1,
StorageNodeCount: 12, StorageNodeCount: 12,
UplinkCount: 1, UplinkCount: 1,
Reconfigure: testplanet.Reconfigure{
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
config.Overlay.Node.OnlineWindow = 0
},
},
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
// first, upload some remote data // first, upload some remote data
ul := planet.Uplinks[0] ul := planet.Uplinks[0]

View File

@ -177,7 +177,7 @@ func (cache *Cache) Get(ctx context.Context, nodeID storj.NodeID) (_ *NodeDossie
// IsOnline checks if a node is 'online' based on the collected statistics. // IsOnline checks if a node is 'online' based on the collected statistics.
func (cache *Cache) IsOnline(node *NodeDossier) bool { func (cache *Cache) IsOnline(node *NodeDossier) bool {
return time.Now().Sub(node.Reputation.LastContactSuccess) < cache.preferences.OnlineWindow && return time.Now().Sub(node.Reputation.LastContactSuccess) < cache.preferences.OnlineWindow ||
node.Reputation.LastContactSuccess.After(node.Reputation.LastContactFailure) node.Reputation.LastContactSuccess.After(node.Reputation.LastContactFailure)
} }

View File

@ -10,9 +10,9 @@ import (
"time" "time"
"github.com/lib/pq" "github.com/lib/pq"
"github.com/mattn/go-sqlite3" sqlite3 "github.com/mattn/go-sqlite3"
"github.com/zeebo/errs" "github.com/zeebo/errs"
"gopkg.in/spacemonkeygo/monkit.v2" monkit "gopkg.in/spacemonkeygo/monkit.v2"
"storj.io/storj/internal/version" "storj.io/storj/internal/version"
"storj.io/storj/pkg/overlay" "storj.io/storj/pkg/overlay"
@ -44,8 +44,8 @@ func (cache *overlaycache) SelectStorageNodes(ctx context.Context, count int, cr
AND free_disk >= ? AND free_disk >= ?
AND total_audit_count >= ? AND total_audit_count >= ?
AND total_uptime_count >= ? AND total_uptime_count >= ?
AND last_contact_success > ? AND (last_contact_success > ?
AND last_contact_success > last_contact_failure` OR last_contact_success > last_contact_failure)`
args := append(make([]interface{}, 0, 13), args := append(make([]interface{}, 0, 13),
nodeType, criteria.FreeBandwidth, criteria.FreeDisk, criteria.AuditCount, nodeType, criteria.FreeBandwidth, criteria.FreeDisk, criteria.AuditCount,
criteria.UptimeCount, time.Now().Add(-criteria.OnlineWindow)) criteria.UptimeCount, time.Now().Add(-criteria.OnlineWindow))
@ -99,8 +99,8 @@ func (cache *overlaycache) SelectNewStorageNodes(ctx context.Context, count int,
AND free_bandwidth >= ? AND free_bandwidth >= ?
AND free_disk >= ? AND free_disk >= ?
AND (total_audit_count < ? OR total_uptime_count < ?) AND (total_audit_count < ? OR total_uptime_count < ?)
AND last_contact_success > ? AND (last_contact_success > ?
AND last_contact_success > last_contact_failure` OR last_contact_success > last_contact_failure)`
args := append(make([]interface{}, 0, 10), args := append(make([]interface{}, 0, 10),
nodeType, criteria.FreeBandwidth, criteria.FreeDisk, criteria.AuditCount, criteria.UptimeCount, time.Now().Add(-criteria.OnlineWindow)) nodeType, criteria.FreeBandwidth, criteria.FreeDisk, criteria.AuditCount, criteria.UptimeCount, time.Now().Add(-criteria.OnlineWindow))
@ -412,7 +412,7 @@ func (cache *overlaycache) KnownOffline(ctx context.Context, criteria *overlay.N
SELECT id FROM nodes SELECT id FROM nodes
WHERE id IN (?`+strings.Repeat(", ?", len(nodeIds)-1)+`) WHERE id IN (?`+strings.Repeat(", ?", len(nodeIds)-1)+`)
AND ( AND (
last_contact_success < last_contact_failure OR last_contact_success < ? last_contact_success < last_contact_failure AND last_contact_success < ?
) )
`), args...) `), args...)
@ -421,7 +421,7 @@ func (cache *overlaycache) KnownOffline(ctx context.Context, criteria *overlay.N
SELECT id FROM nodes SELECT id FROM nodes
WHERE id = any($1::bytea[]) WHERE id = any($1::bytea[])
AND ( AND (
last_contact_success < last_contact_failure OR last_contact_success < $2 last_contact_success < last_contact_failure AND last_contact_success < $2
) )
`, postgresNodeIDList(nodeIds), time.Now().Add(-criteria.OnlineWindow), `, postgresNodeIDList(nodeIds), time.Now().Add(-criteria.OnlineWindow),
) )
@ -469,7 +469,7 @@ func (cache *overlaycache) KnownUnreliableOrOffline(ctx context.Context, criteri
SELECT id FROM nodes SELECT id FROM nodes
WHERE id IN (?`+strings.Repeat(", ?", len(nodeIds)-1)+`) WHERE id IN (?`+strings.Repeat(", ?", len(nodeIds)-1)+`)
AND disqualified IS NULL AND disqualified IS NULL
AND last_contact_success > ? AND last_contact_success > last_contact_failure AND (last_contact_success > ? OR last_contact_success > last_contact_failure)
`), args...) `), args...)
case *pq.Driver: case *pq.Driver:
@ -477,7 +477,7 @@ func (cache *overlaycache) KnownUnreliableOrOffline(ctx context.Context, criteri
SELECT id FROM nodes SELECT id FROM nodes
WHERE id = any($1::bytea[]) WHERE id = any($1::bytea[])
AND disqualified IS NULL AND disqualified IS NULL
AND last_contact_success > $2 AND last_contact_success > last_contact_failure AND (last_contact_success > $2 OR last_contact_success > last_contact_failure)
`, postgresNodeIDList(nodeIds), time.Now().Add(-criteria.OnlineWindow), `, postgresNodeIDList(nodeIds), time.Now().Add(-criteria.OnlineWindow),
) )
default: default:
@ -514,7 +514,7 @@ func (cache *overlaycache) Reliable(ctx context.Context, criteria *overlay.NodeC
rows, err := cache.db.Query(cache.db.Rebind(` rows, err := cache.db.Query(cache.db.Rebind(`
SELECT id FROM nodes SELECT id FROM nodes
WHERE disqualified IS NULL WHERE disqualified IS NULL
AND last_contact_success > ? AND last_contact_success > last_contact_failure`), AND (last_contact_success > ? OR last_contact_success > last_contact_failure)`),
time.Now().Add(-criteria.OnlineWindow)) time.Now().Add(-criteria.OnlineWindow))
if err != nil { if err != nil {
return nil, err return nil, err