storj/satellite/downtime/detection_chore_test.go
Cameron Ayer 3e70a893dd storagenode/{piecestore, contact}: report capacity to satellites if below specific threshold
Curently, storage nodes only report their capacity to satellites
once per hour. If a node fills up, it will fail all uploads until
the next contact cycle begins. With these changes, at the end of an
upload we check whether the MinimumDiskSpace threshold has been
passed. If so, trigger the monitor chore to update the node's
capacity, then trigger the contact chore to report the new
capacity to the satellites

Change-Id: Ie6aadaade1e2c12c87e03f8ff9059a50121380a0
2020-02-18 15:42:48 -05:00

93 lines
3.3 KiB
Go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package downtime_test
import (
"testing"
"time"
"github.com/stretchr/testify/require"
"storj.io/common/testcontext"
"storj.io/storj/private/testplanet"
"storj.io/storj/satellite/overlay"
)
func TestDetectionChore(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 1, UplinkCount: 0,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
node := planet.StorageNodes[0]
nodeDossier := planet.StorageNodes[0].Local()
satellite := planet.Satellites[0]
require.NoError(t, node.Contact.Chore.Pause(ctx))
satellite.DowntimeTracking.DetectionChore.Loop.Pause()
// setup
info := overlay.NodeCheckInInfo{
NodeID: nodeDossier.Id,
IsUp: true,
Address: nodeDossier.Address,
Operator: &nodeDossier.Operator,
Version: &nodeDossier.Version,
}
sixtyOneMinutes := 61 * time.Minute
{ // test node ping back success
// check-in 1 hours, 1 minute ago for that node
oldCheckinTime := time.Now().UTC().Add(-sixtyOneMinutes)
err := satellite.DB.OverlayCache().UpdateCheckIn(ctx, info, oldCheckinTime, overlay.NodeSelectionConfig{})
require.NoError(t, err)
// get successful nodes that haven't checked in with the hour. should return 1
nodeLastContacts, err := satellite.DB.OverlayCache().GetSuccesfulNodesNotCheckedInSince(ctx, time.Hour)
require.NoError(t, err)
require.Len(t, nodeLastContacts, 1)
require.WithinDuration(t, oldCheckinTime, nodeLastContacts[0].LastContactSuccess, time.Second)
// run detection chore
satellite.DowntimeTracking.DetectionChore.Loop.TriggerWait()
// node should not be in "offline" list or "successful, not checked in" list
nodeLastContacts, err = satellite.DB.OverlayCache().GetSuccesfulNodesNotCheckedInSince(ctx, time.Hour)
require.NoError(t, err)
require.Len(t, nodeLastContacts, 0)
nodesOffline, err := satellite.DB.OverlayCache().GetOfflineNodesLimited(ctx, 10)
require.NoError(t, err)
require.Len(t, nodesOffline, 0)
}
{ // test node ping back failure
// check-in 1 hour, 1 minute ago for that node - again
oldCheckinTime := time.Now().UTC().Add(-sixtyOneMinutes)
err := satellite.DB.OverlayCache().UpdateCheckIn(ctx, info, oldCheckinTime, overlay.NodeSelectionConfig{})
require.NoError(t, err)
// close the node service so the ping back will fail
err = node.Server.Close()
require.NoError(t, err)
// get successful nodes that haven't checked in with the hour. should return 1 - again
nodeLastContacts, err := satellite.DB.OverlayCache().GetSuccesfulNodesNotCheckedInSince(ctx, time.Hour)
require.NoError(t, err)
require.Len(t, nodeLastContacts, 1)
require.WithinDuration(t, oldCheckinTime, nodeLastContacts[0].LastContactSuccess, time.Second)
// run detection chore - again
satellite.DowntimeTracking.DetectionChore.Loop.TriggerWait()
// node should be in "offline" list but not in "successful, not checked in" list
nodeLastContacts, err = satellite.DB.OverlayCache().GetSuccesfulNodesNotCheckedInSince(ctx, time.Hour)
require.NoError(t, err)
require.Len(t, nodeLastContacts, 0)
nodesOffline, err := satellite.DB.OverlayCache().GetOfflineNodesLimited(ctx, 10)
require.NoError(t, err)
require.Len(t, nodesOffline, 1)
}
})
}