satellite/gracefulexit: select new node filtered by Distinct IP (#3435)
This commit is contained in:
parent
aaead2a29e
commit
68a7790069
@ -21,7 +21,7 @@ type Reconfigure struct {
|
|||||||
|
|
||||||
NewStorageNodeDB func(index int, db storagenode.DB, log *zap.Logger) (storagenode.DB, error)
|
NewStorageNodeDB func(index int, db storagenode.DB, log *zap.Logger) (storagenode.DB, error)
|
||||||
StorageNode func(index int, config *storagenode.Config)
|
StorageNode func(index int, config *storagenode.Config)
|
||||||
NewIPCount int
|
UniqueIPCount int
|
||||||
}
|
}
|
||||||
|
|
||||||
// DisablePeerCAWhitelist returns a `Reconfigure` that sets `UsePeerCAWhitelist` for
|
// DisablePeerCAWhitelist returns a `Reconfigure` that sets `UsePeerCAWhitelist` for
|
||||||
|
@ -135,7 +135,7 @@ func (planet *Planet) newStorageNodes(count int, whitelistedSatellites storj.Nod
|
|||||||
planet.config.Reconfigure.StorageNode(i, &config)
|
planet.config.Reconfigure.StorageNode(i, &config)
|
||||||
}
|
}
|
||||||
|
|
||||||
newIPCount := planet.config.Reconfigure.NewIPCount
|
newIPCount := planet.config.Reconfigure.UniqueIPCount
|
||||||
if newIPCount > 0 {
|
if newIPCount > 0 {
|
||||||
if i >= count-newIPCount {
|
if i >= count-newIPCount {
|
||||||
config.Server.Address = fmt.Sprintf("127.0.%d.1:0", i+1)
|
config.Server.Address = fmt.Sprintf("127.0.%d.1:0", i+1)
|
||||||
|
@ -267,6 +267,7 @@ func (endpoint *Endpoint) doProcess(stream processStream) (err error) {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// maps pieceIDs to pendingTransfers to keep track of ongoing piece transfer requests
|
||||||
pending := newPendingMap()
|
pending := newPendingMap()
|
||||||
|
|
||||||
// these are used to synchronize the "incomplete transfer loop" with the main thread (storagenode receive loop)
|
// these are used to synchronize the "incomplete transfer loop" with the main thread (storagenode receive loop)
|
||||||
|
@ -5,16 +5,19 @@ package overlay_test
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
"github.com/zeebo/errs"
|
"github.com/zeebo/errs"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"storj.io/storj/internal/testcontext"
|
"storj.io/storj/internal/testcontext"
|
||||||
"storj.io/storj/internal/testplanet"
|
"storj.io/storj/internal/testplanet"
|
||||||
"storj.io/storj/pkg/storj"
|
"storj.io/storj/pkg/storj"
|
||||||
|
"storj.io/storj/satellite"
|
||||||
"storj.io/storj/satellite/overlay"
|
"storj.io/storj/satellite/overlay"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -301,6 +304,61 @@ func TestNodeSelectionGracefulExit(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFindStorageNodesDistinctIPs(t *testing.T) {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
t.Skip("Test does not work with macOS")
|
||||||
|
}
|
||||||
|
testplanet.Run(t, testplanet.Config{
|
||||||
|
SatelliteCount: 1, StorageNodeCount: 5, UplinkCount: 1,
|
||||||
|
Reconfigure: testplanet.Reconfigure{
|
||||||
|
// will create 3 storage nodes with same IP; 2 will have unique
|
||||||
|
UniqueIPCount: 2,
|
||||||
|
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
|
||||||
|
config.Overlay.Node.DistinctIP = true
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
||||||
|
satellite := planet.Satellites[0]
|
||||||
|
|
||||||
|
// select one of the nodes that shares an IP with others to exclude
|
||||||
|
var excludedNodes storj.NodeIDList
|
||||||
|
addrCounts := make(map[string]int)
|
||||||
|
var excludedNodeAddr string
|
||||||
|
for _, node := range planet.StorageNodes {
|
||||||
|
addrNoPort := strings.Split(node.Addr(), ":")[0]
|
||||||
|
if addrCounts[addrNoPort] > 0 && len(excludedNodes) == 0 {
|
||||||
|
excludedNodes = append(excludedNodes, node.ID())
|
||||||
|
break
|
||||||
|
}
|
||||||
|
addrCounts[addrNoPort]++
|
||||||
|
}
|
||||||
|
require.Len(t, excludedNodes, 1)
|
||||||
|
res, err := satellite.Overlay.Service.Get(ctx, excludedNodes[0])
|
||||||
|
require.NoError(t, err)
|
||||||
|
excludedNodeAddr = res.LastIp
|
||||||
|
|
||||||
|
req := overlay.FindStorageNodesRequest{
|
||||||
|
MinimumRequiredNodes: 2,
|
||||||
|
RequestedCount: 2,
|
||||||
|
ExcludedNodes: excludedNodes,
|
||||||
|
}
|
||||||
|
nodes, err := satellite.Overlay.Service.FindStorageNodes(ctx, req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, nodes, 2)
|
||||||
|
require.NotEqual(t, nodes[0].LastIp, nodes[1].LastIp)
|
||||||
|
require.NotEqual(t, nodes[0].LastIp, excludedNodeAddr)
|
||||||
|
require.NotEqual(t, nodes[1].LastIp, excludedNodeAddr)
|
||||||
|
|
||||||
|
req = overlay.FindStorageNodesRequest{
|
||||||
|
MinimumRequiredNodes: 3,
|
||||||
|
RequestedCount: 3,
|
||||||
|
ExcludedNodes: excludedNodes,
|
||||||
|
}
|
||||||
|
_, err = satellite.Overlay.Service.FindStorageNodes(ctx, req)
|
||||||
|
require.Error(t, err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestDistinctIPs(t *testing.T) {
|
func TestDistinctIPs(t *testing.T) {
|
||||||
if runtime.GOOS == "darwin" {
|
if runtime.GOOS == "darwin" {
|
||||||
t.Skip("Test does not work with macOS")
|
t.Skip("Test does not work with macOS")
|
||||||
@ -308,7 +366,7 @@ func TestDistinctIPs(t *testing.T) {
|
|||||||
testplanet.Run(t, testplanet.Config{
|
testplanet.Run(t, testplanet.Config{
|
||||||
SatelliteCount: 1, StorageNodeCount: 10, UplinkCount: 1,
|
SatelliteCount: 1, StorageNodeCount: 10, UplinkCount: 1,
|
||||||
Reconfigure: testplanet.Reconfigure{
|
Reconfigure: testplanet.Reconfigure{
|
||||||
NewIPCount: 3,
|
UniqueIPCount: 3,
|
||||||
},
|
},
|
||||||
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
||||||
satellite := planet.Satellites[0]
|
satellite := planet.Satellites[0]
|
||||||
@ -338,7 +396,7 @@ func TestDistinctIPsWithBatch(t *testing.T) {
|
|||||||
testplanet.Run(t, testplanet.Config{
|
testplanet.Run(t, testplanet.Config{
|
||||||
SatelliteCount: 1, StorageNodeCount: 10, UplinkCount: 1,
|
SatelliteCount: 1, StorageNodeCount: 10, UplinkCount: 1,
|
||||||
Reconfigure: testplanet.Reconfigure{
|
Reconfigure: testplanet.Reconfigure{
|
||||||
NewIPCount: 3,
|
UniqueIPCount: 3,
|
||||||
},
|
},
|
||||||
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
||||||
satellite := planet.Satellites[0]
|
satellite := planet.Satellites[0]
|
||||||
|
@ -81,6 +81,9 @@ type DB interface {
|
|||||||
GetGracefulExitIncompleteByTimeFrame(ctx context.Context, begin, end time.Time) (exitingNodes storj.NodeIDList, err error)
|
GetGracefulExitIncompleteByTimeFrame(ctx context.Context, begin, end time.Time) (exitingNodes storj.NodeIDList, err error)
|
||||||
// GetExitStatus returns a node's graceful exit status.
|
// GetExitStatus returns a node's graceful exit status.
|
||||||
GetExitStatus(ctx context.Context, nodeID storj.NodeID) (exitStatus *ExitStatus, err error)
|
GetExitStatus(ctx context.Context, nodeID storj.NodeID) (exitStatus *ExitStatus, err error)
|
||||||
|
|
||||||
|
// GetNodeIPs returns a list of IP addresses associated with given node IDs.
|
||||||
|
GetNodeIPs(ctx context.Context, nodeIDs []storj.NodeID) (nodeIPs []string, err error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NodeCheckInInfo contains all the info that will be updated when a node checkins
|
// NodeCheckInInfo contains all the info that will be updated when a node checkins
|
||||||
@ -255,6 +258,14 @@ func (service *Service) FindStorageNodesWithPreferences(ctx context.Context, req
|
|||||||
}
|
}
|
||||||
|
|
||||||
excludedNodes := req.ExcludedNodes
|
excludedNodes := req.ExcludedNodes
|
||||||
|
// get and exclude IPs associated with excluded nodes if distinctIP is enabled
|
||||||
|
var excludedIPs []string
|
||||||
|
if preferences.DistinctIP && len(excludedNodes) > 0 {
|
||||||
|
excludedIPs, err = service.db.GetNodeIPs(ctx, excludedNodes)
|
||||||
|
if err != nil {
|
||||||
|
return nil, Error.Wrap(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
newNodeCount := 0
|
newNodeCount := 0
|
||||||
if preferences.NewNodePercentage > 0 {
|
if preferences.NewNodePercentage > 0 {
|
||||||
@ -277,7 +288,6 @@ func (service *Service) FindStorageNodesWithPreferences(ctx context.Context, req
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var excludedIPs []string
|
|
||||||
// add selected new nodes and their IPs to the excluded lists for reputable node selection
|
// add selected new nodes and their IPs to the excluded lists for reputable node selection
|
||||||
for _, newNode := range newNodes {
|
for _, newNode := range newNodes {
|
||||||
excludedNodes = append(excludedNodes, newNode.Id)
|
excludedNodes = append(excludedNodes, newNode.Id)
|
||||||
|
@ -146,6 +146,32 @@ func (cache *overlaycache) SelectNewStorageNodes(ctx context.Context, count int,
|
|||||||
return nodes, nil
|
return nodes, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetNodeIPs returns a list of node IP addresses. Warning: these node IP addresses might be returned out of order.
|
||||||
|
func (cache *overlaycache) GetNodeIPs(ctx context.Context, nodeIDs []storj.NodeID) (nodeIPs []string, err error) {
|
||||||
|
defer mon.Task()(&ctx)(&err)
|
||||||
|
|
||||||
|
var rows *sql.Rows
|
||||||
|
rows, err = cache.db.Query(cache.db.Rebind(`
|
||||||
|
SELECT last_net FROM nodes
|
||||||
|
WHERE id = any($1::bytea[])
|
||||||
|
`), postgresNodeIDList(nodeIDs),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer func() { err = errs.Combine(err, rows.Close()) }()
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
var ip string
|
||||||
|
err = rows.Scan(&ip)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
nodeIPs = append(nodeIPs, ip)
|
||||||
|
}
|
||||||
|
return nodeIPs, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (cache *overlaycache) queryNodes(ctx context.Context, excludedNodes []storj.NodeID, count int, safeQuery string, args ...interface{}) (_ []*pb.Node, err error) {
|
func (cache *overlaycache) queryNodes(ctx context.Context, excludedNodes []storj.NodeID, count int, safeQuery string, args ...interface{}) (_ []*pb.Node, err error) {
|
||||||
defer mon.Task()(&ctx)(&err)
|
defer mon.Task()(&ctx)(&err)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user