satellite/overlay: add config flag for node selection free disk requirement
Currently SNs report their free disk space once per hour. If a node becomes full, it has to wait until the next contact cycle begins to report; all the while receiving and failing upload requests. By increasing the minimum required disk space, we can give the storage nodes more time to report their space before the completely fill up. This change goes hand-in-hand with another change we want to implement: trigger capacity report on SN immediately upon falling below threshold. Change-Id: I12f778286c6c3f582438b0e2949765ac43325e27
This commit is contained in:
parent
dba647199a
commit
b22bf16b35
@ -289,6 +289,7 @@ func (planet *Planet) newSatellites(count int) ([]*SatelliteSystem, error) {
|
||||
NewNodePercentage: 0,
|
||||
OnlineWindow: time.Minute,
|
||||
DistinctIP: false,
|
||||
MinimumDiskSpace: 100 * memory.MB,
|
||||
|
||||
AuditReputationRepairWeight: 1,
|
||||
AuditReputationUplinkWeight: 1,
|
||||
|
@ -176,7 +176,6 @@ func TestDisqualifiedNodesGetNoUpload(t *testing.T) {
|
||||
MinimumRequiredNodes: 4,
|
||||
RequestedCount: 0,
|
||||
FreeBandwidth: 0,
|
||||
FreeDisk: 0,
|
||||
ExcludedNodes: nil,
|
||||
MinimumVersion: "", // semver or empty
|
||||
}
|
||||
|
@ -396,7 +396,6 @@ func (endpoint *Endpoint) processIncomplete(ctx context.Context, stream processS
|
||||
request := &overlay.FindStorageNodesRequest{
|
||||
RequestedCount: 1,
|
||||
FreeBandwidth: pieceSize,
|
||||
FreeDisk: pieceSize,
|
||||
ExcludedNodes: excludedNodeIDs,
|
||||
}
|
||||
|
||||
|
@ -202,7 +202,6 @@ func (endpoint *Endpoint) CreateSegmentOld(ctx context.Context, req *pb.SegmentW
|
||||
request := overlay.FindStorageNodesRequest{
|
||||
RequestedCount: int(req.Redundancy.Total),
|
||||
FreeBandwidth: maxPieceSize,
|
||||
FreeDisk: maxPieceSize,
|
||||
}
|
||||
nodes, err := endpoint.overlay.FindStorageNodes(ctx, request)
|
||||
if err != nil {
|
||||
@ -1484,7 +1483,6 @@ func (endpoint *Endpoint) BeginSegment(ctx context.Context, req *pb.SegmentBegin
|
||||
request := overlay.FindStorageNodesRequest{
|
||||
RequestedCount: redundancy.TotalCount(),
|
||||
FreeBandwidth: maxPieceSize,
|
||||
FreeDisk: maxPieceSize,
|
||||
}
|
||||
nodes, err := endpoint.overlay.FindStorageNodes(ctx, request)
|
||||
if err != nil {
|
||||
|
@ -8,6 +8,8 @@ import (
|
||||
|
||||
"github.com/spacemonkeygo/monkit/v3"
|
||||
"github.com/zeebo/errs"
|
||||
|
||||
"storj.io/common/memory"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -31,6 +33,7 @@ type NodeSelectionConfig struct {
|
||||
MinimumVersion string `help:"the minimum node software version for node selection queries" default:""`
|
||||
OnlineWindow time.Duration `help:"the amount of time without seeing a node before its considered offline" default:"4h"`
|
||||
DistinctIP bool `help:"require distinct IPs when choosing nodes for upload" releaseDefault:"true" devDefault:"false"`
|
||||
MinimumDiskSpace memory.Size `help:"how much disk space a node at minimum must have to be selected for upload" default:"100MB"`
|
||||
|
||||
AuditReputationRepairWeight float64 `help:"weight to apply to audit reputation for total repair reputation calculation" default:"1.0"`
|
||||
AuditReputationUplinkWeight float64 `help:"weight to apply to audit reputation for total uplink reputation calculation" default:"1.0"`
|
||||
|
@ -4,6 +4,9 @@
|
||||
package overlay_test
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"net"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
@ -14,6 +17,9 @@ import (
|
||||
"github.com/zeebo/errs"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"storj.io/common/memory"
|
||||
"storj.io/common/pb"
|
||||
"storj.io/common/rpc/rpcpeer"
|
||||
"storj.io/common/storj"
|
||||
"storj.io/common/testcontext"
|
||||
"storj.io/storj/private/testplanet"
|
||||
@ -21,6 +27,71 @@ import (
|
||||
"storj.io/storj/satellite/overlay"
|
||||
)
|
||||
|
||||
func TestMinimumDiskSpace(t *testing.T) {
|
||||
testplanet.Run(t, testplanet.Config{
|
||||
SatelliteCount: 1, StorageNodeCount: 2, UplinkCount: 0,
|
||||
Reconfigure: testplanet.Reconfigure{
|
||||
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
|
||||
config.Overlay.Node.MinimumDiskSpace = 10 * memory.MB
|
||||
},
|
||||
},
|
||||
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
||||
node0 := planet.StorageNodes[0]
|
||||
node0.Contact.Chore.Pause(ctx)
|
||||
nodeDossier := node0.Local()
|
||||
ident := node0.Identity
|
||||
peer := rpcpeer.Peer{
|
||||
Addr: &net.TCPAddr{
|
||||
IP: net.ParseIP(nodeDossier.Address.GetAddress()),
|
||||
Port: 5,
|
||||
},
|
||||
State: tls.ConnectionState{
|
||||
PeerCertificates: []*x509.Certificate{ident.Leaf, ident.CA},
|
||||
},
|
||||
}
|
||||
peerCtx := rpcpeer.NewContext(ctx, &peer)
|
||||
|
||||
// report disk space less than minimum
|
||||
_, err := planet.Satellites[0].Contact.Endpoint.CheckIn(peerCtx, &pb.CheckInRequest{
|
||||
Address: nodeDossier.Address.GetAddress(),
|
||||
Version: &nodeDossier.Version,
|
||||
Capacity: &pb.NodeCapacity{
|
||||
FreeBandwidth: 100000,
|
||||
FreeDisk: 9 * memory.MB.Int64(),
|
||||
},
|
||||
Operator: &nodeDossier.Operator,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// request 2 nodes, expect failure from not enough nodes
|
||||
_, err = planet.Satellites[0].Overlay.Service.FindStorageNodes(ctx, overlay.FindStorageNodesRequest{
|
||||
MinimumRequiredNodes: 2,
|
||||
RequestedCount: 2,
|
||||
})
|
||||
require.Error(t, err)
|
||||
require.True(t, overlay.ErrNotEnoughNodes.Has(err))
|
||||
|
||||
// report disk space greater than minimum
|
||||
_, err = planet.Satellites[0].Contact.Endpoint.CheckIn(peerCtx, &pb.CheckInRequest{
|
||||
Address: nodeDossier.Address.GetAddress(),
|
||||
Version: &nodeDossier.Version,
|
||||
Capacity: &pb.NodeCapacity{
|
||||
FreeBandwidth: 100000,
|
||||
FreeDisk: 11 * memory.MB.Int64(),
|
||||
},
|
||||
Operator: &nodeDossier.Operator,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// request 2 nodes, expect success
|
||||
_, err = planet.Satellites[0].Overlay.Service.FindStorageNodes(ctx, overlay.FindStorageNodesRequest{
|
||||
MinimumRequiredNodes: 2,
|
||||
RequestedCount: 2,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestOffline(t *testing.T) {
|
||||
testplanet.Run(t, testplanet.Config{
|
||||
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1,
|
||||
@ -181,7 +252,6 @@ func testNodeSelection(t *testing.T, ctx *testcontext.Context, planet *testplane
|
||||
|
||||
response, err := service.FindStorageNodesWithPreferences(ctx, overlay.FindStorageNodesRequest{
|
||||
FreeBandwidth: 0,
|
||||
FreeDisk: 0,
|
||||
RequestedCount: tt.RequestCount,
|
||||
ExcludedNodes: excludedNodes,
|
||||
}, &tt.Preferences)
|
||||
@ -279,7 +349,6 @@ func TestNodeSelectionGracefulExit(t *testing.T) {
|
||||
|
||||
response, err := satellite.Overlay.Service.FindStorageNodesWithPreferences(ctx, overlay.FindStorageNodesRequest{
|
||||
FreeBandwidth: 0,
|
||||
FreeDisk: 0,
|
||||
RequestedCount: tt.RequestCount,
|
||||
}, &tt.Preferences)
|
||||
|
||||
@ -488,7 +557,6 @@ func testDistinctIPs(t *testing.T, ctx *testcontext.Context, planet *testplanet.
|
||||
for _, tt := range tests {
|
||||
response, err := service.FindStorageNodesWithPreferences(ctx, overlay.FindStorageNodesRequest{
|
||||
FreeBandwidth: 0,
|
||||
FreeDisk: 0,
|
||||
RequestedCount: tt.requestCount,
|
||||
}, &tt.preferences)
|
||||
if tt.shouldFailWith != nil {
|
||||
|
@ -112,7 +112,6 @@ type FindStorageNodesRequest struct {
|
||||
MinimumRequiredNodes int
|
||||
RequestedCount int
|
||||
FreeBandwidth int64
|
||||
FreeDisk int64
|
||||
ExcludedNodes []storj.NodeID
|
||||
MinimumVersion string // semver or empty
|
||||
}
|
||||
@ -288,7 +287,7 @@ func (service *Service) FindStorageNodesWithPreferences(ctx context.Context, req
|
||||
if newNodeCount > 0 {
|
||||
newNodes, err = service.db.SelectNewStorageNodes(ctx, newNodeCount, &NodeCriteria{
|
||||
FreeBandwidth: req.FreeBandwidth,
|
||||
FreeDisk: req.FreeDisk,
|
||||
FreeDisk: preferences.MinimumDiskSpace.Int64(),
|
||||
AuditCount: preferences.AuditCount,
|
||||
ExcludedNodes: excludedNodes,
|
||||
MinimumVersion: preferences.MinimumVersion,
|
||||
@ -311,7 +310,7 @@ func (service *Service) FindStorageNodesWithPreferences(ctx context.Context, req
|
||||
|
||||
criteria := NodeCriteria{
|
||||
FreeBandwidth: req.FreeBandwidth,
|
||||
FreeDisk: req.FreeDisk,
|
||||
FreeDisk: preferences.MinimumDiskSpace.Int64(),
|
||||
AuditCount: preferences.AuditCount,
|
||||
UptimeCount: preferences.UptimeCount,
|
||||
ExcludedNodes: excludedNodes,
|
||||
|
@ -172,7 +172,6 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, path storj.Path) (s
|
||||
request := overlay.FindStorageNodesRequest{
|
||||
RequestedCount: requestCount,
|
||||
FreeBandwidth: pieceSize,
|
||||
FreeDisk: pieceSize,
|
||||
ExcludedNodes: excludeNodeIDs,
|
||||
}
|
||||
newNodes, err := repairer.overlay.FindStorageNodes(ctx, request)
|
||||
|
3
scripts/testdata/satellite-config.yaml.lock
vendored
3
scripts/testdata/satellite-config.yaml.lock
vendored
@ -358,6 +358,9 @@ identity.key-path: /root/.local/share/storj/identity/satellite/identity.key
|
||||
# require distinct IPs when choosing nodes for upload
|
||||
# overlay.node.distinct-ip: true
|
||||
|
||||
# how much disk space a node at minimum must have to be selected for upload
|
||||
# overlay.node.minimum-disk-space: 100.0 MB
|
||||
|
||||
# the minimum node software version for node selection queries
|
||||
# overlay.node.minimum-version: ""
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user