3e70a893dd
Curently, storage nodes only report their capacity to satellites once per hour. If a node fills up, it will fail all uploads until the next contact cycle begins. With these changes, at the end of an upload we check whether the MinimumDiskSpace threshold has been passed. If so, trigger the monitor chore to update the node's capacity, then trigger the contact chore to report the new capacity to the satellites Change-Id: Ie6aadaade1e2c12c87e03f8ff9059a50121380a0
93 lines
3.3 KiB
Go
93 lines
3.3 KiB
Go
// Copyright (C) 2019 Storj Labs, Inc.
|
|
// See LICENSE for copying information.
|
|
|
|
package downtime_test
|
|
|
|
import (
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"storj.io/common/testcontext"
|
|
"storj.io/storj/private/testplanet"
|
|
"storj.io/storj/satellite/overlay"
|
|
)
|
|
|
|
func TestDetectionChore(t *testing.T) {
|
|
testplanet.Run(t, testplanet.Config{
|
|
SatelliteCount: 1, StorageNodeCount: 1, UplinkCount: 0,
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
node := planet.StorageNodes[0]
|
|
nodeDossier := planet.StorageNodes[0].Local()
|
|
satellite := planet.Satellites[0]
|
|
|
|
require.NoError(t, node.Contact.Chore.Pause(ctx))
|
|
satellite.DowntimeTracking.DetectionChore.Loop.Pause()
|
|
|
|
// setup
|
|
info := overlay.NodeCheckInInfo{
|
|
NodeID: nodeDossier.Id,
|
|
IsUp: true,
|
|
Address: nodeDossier.Address,
|
|
Operator: &nodeDossier.Operator,
|
|
Version: &nodeDossier.Version,
|
|
}
|
|
|
|
sixtyOneMinutes := 61 * time.Minute
|
|
{ // test node ping back success
|
|
// check-in 1 hours, 1 minute ago for that node
|
|
oldCheckinTime := time.Now().UTC().Add(-sixtyOneMinutes)
|
|
err := satellite.DB.OverlayCache().UpdateCheckIn(ctx, info, oldCheckinTime, overlay.NodeSelectionConfig{})
|
|
require.NoError(t, err)
|
|
|
|
// get successful nodes that haven't checked in with the hour. should return 1
|
|
nodeLastContacts, err := satellite.DB.OverlayCache().GetSuccesfulNodesNotCheckedInSince(ctx, time.Hour)
|
|
require.NoError(t, err)
|
|
require.Len(t, nodeLastContacts, 1)
|
|
require.WithinDuration(t, oldCheckinTime, nodeLastContacts[0].LastContactSuccess, time.Second)
|
|
|
|
// run detection chore
|
|
satellite.DowntimeTracking.DetectionChore.Loop.TriggerWait()
|
|
|
|
// node should not be in "offline" list or "successful, not checked in" list
|
|
nodeLastContacts, err = satellite.DB.OverlayCache().GetSuccesfulNodesNotCheckedInSince(ctx, time.Hour)
|
|
require.NoError(t, err)
|
|
require.Len(t, nodeLastContacts, 0)
|
|
|
|
nodesOffline, err := satellite.DB.OverlayCache().GetOfflineNodesLimited(ctx, 10)
|
|
require.NoError(t, err)
|
|
require.Len(t, nodesOffline, 0)
|
|
}
|
|
|
|
{ // test node ping back failure
|
|
// check-in 1 hour, 1 minute ago for that node - again
|
|
oldCheckinTime := time.Now().UTC().Add(-sixtyOneMinutes)
|
|
err := satellite.DB.OverlayCache().UpdateCheckIn(ctx, info, oldCheckinTime, overlay.NodeSelectionConfig{})
|
|
require.NoError(t, err)
|
|
|
|
// close the node service so the ping back will fail
|
|
err = node.Server.Close()
|
|
require.NoError(t, err)
|
|
|
|
// get successful nodes that haven't checked in with the hour. should return 1 - again
|
|
nodeLastContacts, err := satellite.DB.OverlayCache().GetSuccesfulNodesNotCheckedInSince(ctx, time.Hour)
|
|
require.NoError(t, err)
|
|
require.Len(t, nodeLastContacts, 1)
|
|
require.WithinDuration(t, oldCheckinTime, nodeLastContacts[0].LastContactSuccess, time.Second)
|
|
|
|
// run detection chore - again
|
|
satellite.DowntimeTracking.DetectionChore.Loop.TriggerWait()
|
|
|
|
// node should be in "offline" list but not in "successful, not checked in" list
|
|
nodeLastContacts, err = satellite.DB.OverlayCache().GetSuccesfulNodesNotCheckedInSince(ctx, time.Hour)
|
|
require.NoError(t, err)
|
|
require.Len(t, nodeLastContacts, 0)
|
|
|
|
nodesOffline, err := satellite.DB.OverlayCache().GetOfflineNodesLimited(ctx, 10)
|
|
require.NoError(t, err)
|
|
require.Len(t, nodesOffline, 1)
|
|
}
|
|
})
|
|
}
|