satellite/overlay: add config flag for node selection free disk requirement

Currently SNs report their free disk space once per hour. If a node
becomes full, it has to wait until the next contact cycle begins to
report; all the while receiving and failing upload requests. By increasing
the minimum required disk space, we can give the storage nodes more time
to report their space before the completely fill up. This change goes
hand-in-hand with another change we want to implement: trigger capacity
report on SN immediately upon falling below threshold.

Change-Id: I12f778286c6c3f582438b0e2949765ac43325e27
This commit is contained in:
Cameron Ayer 2020-02-10 15:59:29 -05:00 committed by Cameron
parent dba647199a
commit b22bf16b35
9 changed files with 80 additions and 11 deletions

View File

@ -289,6 +289,7 @@ func (planet *Planet) newSatellites(count int) ([]*SatelliteSystem, error) {
NewNodePercentage: 0,
OnlineWindow: time.Minute,
DistinctIP: false,
MinimumDiskSpace: 100 * memory.MB,
AuditReputationRepairWeight: 1,
AuditReputationUplinkWeight: 1,

View File

@ -176,7 +176,6 @@ func TestDisqualifiedNodesGetNoUpload(t *testing.T) {
MinimumRequiredNodes: 4,
RequestedCount: 0,
FreeBandwidth: 0,
FreeDisk: 0,
ExcludedNodes: nil,
MinimumVersion: "", // semver or empty
}

View File

@ -396,7 +396,6 @@ func (endpoint *Endpoint) processIncomplete(ctx context.Context, stream processS
request := &overlay.FindStorageNodesRequest{
RequestedCount: 1,
FreeBandwidth: pieceSize,
FreeDisk: pieceSize,
ExcludedNodes: excludedNodeIDs,
}

View File

@ -202,7 +202,6 @@ func (endpoint *Endpoint) CreateSegmentOld(ctx context.Context, req *pb.SegmentW
request := overlay.FindStorageNodesRequest{
RequestedCount: int(req.Redundancy.Total),
FreeBandwidth: maxPieceSize,
FreeDisk: maxPieceSize,
}
nodes, err := endpoint.overlay.FindStorageNodes(ctx, request)
if err != nil {
@ -1484,7 +1483,6 @@ func (endpoint *Endpoint) BeginSegment(ctx context.Context, req *pb.SegmentBegin
request := overlay.FindStorageNodesRequest{
RequestedCount: redundancy.TotalCount(),
FreeBandwidth: maxPieceSize,
FreeDisk: maxPieceSize,
}
nodes, err := endpoint.overlay.FindStorageNodes(ctx, request)
if err != nil {

View File

@ -8,6 +8,8 @@ import (
"github.com/spacemonkeygo/monkit/v3"
"github.com/zeebo/errs"
"storj.io/common/memory"
)
var (
@ -31,6 +33,7 @@ type NodeSelectionConfig struct {
MinimumVersion string `help:"the minimum node software version for node selection queries" default:""`
OnlineWindow time.Duration `help:"the amount of time without seeing a node before its considered offline" default:"4h"`
DistinctIP bool `help:"require distinct IPs when choosing nodes for upload" releaseDefault:"true" devDefault:"false"`
MinimumDiskSpace memory.Size `help:"how much disk space a node at minimum must have to be selected for upload" default:"100MB"`
AuditReputationRepairWeight float64 `help:"weight to apply to audit reputation for total repair reputation calculation" default:"1.0"`
AuditReputationUplinkWeight float64 `help:"weight to apply to audit reputation for total uplink reputation calculation" default:"1.0"`

View File

@ -4,6 +4,9 @@
package overlay_test
import (
"crypto/tls"
"crypto/x509"
"net"
"runtime"
"strings"
"testing"
@ -14,6 +17,9 @@ import (
"github.com/zeebo/errs"
"go.uber.org/zap"
"storj.io/common/memory"
"storj.io/common/pb"
"storj.io/common/rpc/rpcpeer"
"storj.io/common/storj"
"storj.io/common/testcontext"
"storj.io/storj/private/testplanet"
@ -21,6 +27,71 @@ import (
"storj.io/storj/satellite/overlay"
)
func TestMinimumDiskSpace(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 2, UplinkCount: 0,
Reconfigure: testplanet.Reconfigure{
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
config.Overlay.Node.MinimumDiskSpace = 10 * memory.MB
},
},
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
node0 := planet.StorageNodes[0]
node0.Contact.Chore.Pause(ctx)
nodeDossier := node0.Local()
ident := node0.Identity
peer := rpcpeer.Peer{
Addr: &net.TCPAddr{
IP: net.ParseIP(nodeDossier.Address.GetAddress()),
Port: 5,
},
State: tls.ConnectionState{
PeerCertificates: []*x509.Certificate{ident.Leaf, ident.CA},
},
}
peerCtx := rpcpeer.NewContext(ctx, &peer)
// report disk space less than minimum
_, err := planet.Satellites[0].Contact.Endpoint.CheckIn(peerCtx, &pb.CheckInRequest{
Address: nodeDossier.Address.GetAddress(),
Version: &nodeDossier.Version,
Capacity: &pb.NodeCapacity{
FreeBandwidth: 100000,
FreeDisk: 9 * memory.MB.Int64(),
},
Operator: &nodeDossier.Operator,
})
require.NoError(t, err)
// request 2 nodes, expect failure from not enough nodes
_, err = planet.Satellites[0].Overlay.Service.FindStorageNodes(ctx, overlay.FindStorageNodesRequest{
MinimumRequiredNodes: 2,
RequestedCount: 2,
})
require.Error(t, err)
require.True(t, overlay.ErrNotEnoughNodes.Has(err))
// report disk space greater than minimum
_, err = planet.Satellites[0].Contact.Endpoint.CheckIn(peerCtx, &pb.CheckInRequest{
Address: nodeDossier.Address.GetAddress(),
Version: &nodeDossier.Version,
Capacity: &pb.NodeCapacity{
FreeBandwidth: 100000,
FreeDisk: 11 * memory.MB.Int64(),
},
Operator: &nodeDossier.Operator,
})
require.NoError(t, err)
// request 2 nodes, expect success
_, err = planet.Satellites[0].Overlay.Service.FindStorageNodes(ctx, overlay.FindStorageNodesRequest{
MinimumRequiredNodes: 2,
RequestedCount: 2,
})
require.NoError(t, err)
})
}
func TestOffline(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1,
@ -181,7 +252,6 @@ func testNodeSelection(t *testing.T, ctx *testcontext.Context, planet *testplane
response, err := service.FindStorageNodesWithPreferences(ctx, overlay.FindStorageNodesRequest{
FreeBandwidth: 0,
FreeDisk: 0,
RequestedCount: tt.RequestCount,
ExcludedNodes: excludedNodes,
}, &tt.Preferences)
@ -279,7 +349,6 @@ func TestNodeSelectionGracefulExit(t *testing.T) {
response, err := satellite.Overlay.Service.FindStorageNodesWithPreferences(ctx, overlay.FindStorageNodesRequest{
FreeBandwidth: 0,
FreeDisk: 0,
RequestedCount: tt.RequestCount,
}, &tt.Preferences)
@ -488,7 +557,6 @@ func testDistinctIPs(t *testing.T, ctx *testcontext.Context, planet *testplanet.
for _, tt := range tests {
response, err := service.FindStorageNodesWithPreferences(ctx, overlay.FindStorageNodesRequest{
FreeBandwidth: 0,
FreeDisk: 0,
RequestedCount: tt.requestCount,
}, &tt.preferences)
if tt.shouldFailWith != nil {

View File

@ -112,7 +112,6 @@ type FindStorageNodesRequest struct {
MinimumRequiredNodes int
RequestedCount int
FreeBandwidth int64
FreeDisk int64
ExcludedNodes []storj.NodeID
MinimumVersion string // semver or empty
}
@ -288,7 +287,7 @@ func (service *Service) FindStorageNodesWithPreferences(ctx context.Context, req
if newNodeCount > 0 {
newNodes, err = service.db.SelectNewStorageNodes(ctx, newNodeCount, &NodeCriteria{
FreeBandwidth: req.FreeBandwidth,
FreeDisk: req.FreeDisk,
FreeDisk: preferences.MinimumDiskSpace.Int64(),
AuditCount: preferences.AuditCount,
ExcludedNodes: excludedNodes,
MinimumVersion: preferences.MinimumVersion,
@ -311,7 +310,7 @@ func (service *Service) FindStorageNodesWithPreferences(ctx context.Context, req
criteria := NodeCriteria{
FreeBandwidth: req.FreeBandwidth,
FreeDisk: req.FreeDisk,
FreeDisk: preferences.MinimumDiskSpace.Int64(),
AuditCount: preferences.AuditCount,
UptimeCount: preferences.UptimeCount,
ExcludedNodes: excludedNodes,

View File

@ -172,7 +172,6 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, path storj.Path) (s
request := overlay.FindStorageNodesRequest{
RequestedCount: requestCount,
FreeBandwidth: pieceSize,
FreeDisk: pieceSize,
ExcludedNodes: excludeNodeIDs,
}
newNodes, err := repairer.overlay.FindStorageNodes(ctx, request)

View File

@ -358,6 +358,9 @@ identity.key-path: /root/.local/share/storj/identity/satellite/identity.key
# require distinct IPs when choosing nodes for upload
# overlay.node.distinct-ip: true
# how much disk space a node at minimum must have to be selected for upload
# overlay.node.minimum-disk-space: 100.0 MB
# the minimum node software version for node selection queries
# overlay.node.minimum-version: ""