storj/storagenode/contact/contact_test.go
Clement Sam 59b37db670 storagenode: overhaul QUIC check implementation
The current implementation blocks the the startup until one or none
of the trusted satellites is able to reach the node via QUIC.
This can cause delayed startup. Also, the quic check is done
once during startup, and if there is a misconfiguration later,
snos would have to restart to node.

In this change, we reuse the contact service which pings the satellite
periodically for node checkin. During checkin the satellite tries
pinging the node back via both TCP and QUIC and reports both statuses.
WIth this, we are able to get a periodic update of the QUIC status
without restarting the node.

Also adds the time the node was last pinged via QUIC to the tooltip
on the QUIC status tab.

Resolves https://github.com/storj/storj/issues/4398

Change-Id: I18aa2a8e8d44e8187f8f2eb51f398fa6073882a4
2022-11-09 03:15:57 +00:00

177 lines
5.3 KiB
Go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package contact_test
import (
"crypto/tls"
"crypto/x509"
"testing"
"time"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
"storj.io/common/identity/testidentity"
"storj.io/common/pb"
"storj.io/common/rpc/rpcpeer"
"storj.io/common/testcontext"
"storj.io/storj/private/testplanet"
"storj.io/storj/satellite"
"storj.io/storj/storagenode/contact"
)
func TestStoragenodeContactEndpoint(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 1, UplinkCount: 0,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
pingStats := planet.StorageNodes[0].Contact.PingStats
conn, err := planet.Satellites[0].Dialer.DialNodeURL(ctx, planet.StorageNodes[0].NodeURL())
require.NoError(t, err)
defer ctx.Check(conn.Close)
resp, err := pb.NewDRPCContactClient(conn).PingNode(ctx, &pb.ContactPingRequest{})
require.NotNil(t, resp)
require.NoError(t, err)
firstPing := pingStats.WhenLastPinged()
time.Sleep(time.Second) // HACKFIX: windows has large time granularity
resp, err = pb.NewDRPCContactClient(conn).PingNode(ctx, &pb.ContactPingRequest{})
require.NotNil(t, resp)
require.NoError(t, err)
secondPing := pingStats.WhenLastPinged()
require.True(t, secondPing.After(firstPing))
})
}
func TestNodeInfoUpdated(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 1, UplinkCount: 0,
Reconfigure: testplanet.Reconfigure{
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
config.Overlay.NodeCheckInWaitPeriod = 0
},
},
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
satellite := planet.Satellites[0]
node := planet.StorageNodes[0]
node.Contact.Chore.Pause(ctx)
oldInfo, err := satellite.Overlay.Service.Get(ctx, node.ID())
require.NoError(t, err)
oldCapacity := oldInfo.Capacity
newCapacity := pb.NodeCapacity{
FreeDisk: 0,
}
require.NotEqual(t, oldCapacity, newCapacity)
node.Contact.Service.UpdateSelf(&newCapacity)
node.Contact.Chore.TriggerWait(ctx)
newInfo, err := satellite.Overlay.Service.Get(ctx, node.ID())
require.NoError(t, err)
firstUptime := oldInfo.Reputation.LastContactSuccess
secondUptime := newInfo.Reputation.LastContactSuccess
require.True(t, secondUptime.After(firstUptime))
require.Equal(t, newCapacity, newInfo.Capacity)
})
}
func TestServicePingSatellites(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 2, StorageNodeCount: 1, UplinkCount: 0,
Reconfigure: testplanet.Reconfigure{
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
config.Overlay.NodeCheckInWaitPeriod = 0
},
},
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
node := planet.StorageNodes[0]
node.Contact.Chore.Pause(ctx)
newCapacity := pb.NodeCapacity{
FreeDisk: 0,
}
for _, satellite := range planet.Satellites {
info, err := satellite.Overlay.Service.Get(ctx, node.ID())
require.NoError(t, err)
require.NotEqual(t, newCapacity, info.Capacity)
}
node.Contact.Service.UpdateSelf(&newCapacity)
err := node.Contact.Service.PingSatellites(ctx, 10*time.Second)
require.NoError(t, err)
for _, satellite := range planet.Satellites {
info, err := satellite.Overlay.Service.Get(ctx, node.ID())
require.NoError(t, err)
require.Equal(t, newCapacity, info.Capacity)
}
})
}
func TestEndpointPingNode_UnTrust(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 1, UplinkCount: 0,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
node := planet.StorageNodes[0]
node.Contact.Chore.Pause(ctx)
// make sure a trusted satellite is able to ping node
info, err := planet.Satellites[0].Overlay.Service.Get(ctx, node.ID())
require.NoError(t, err)
require.Equal(t, node.ID(), info.Id)
// an untrusted peer shouldn't be able to ping node successfully
ident, err := testidentity.NewTestIdentity(ctx)
require.NoError(t, err)
state := tls.ConnectionState{
PeerCertificates: []*x509.Certificate{ident.Leaf, ident.CA},
}
peerCtx := rpcpeer.NewContext(ctx, &rpcpeer.Peer{
Addr: node.Server.Addr(),
State: state,
})
_, err = node.Contact.Endpoint.PingNode(peerCtx, &pb.ContactPingRequest{})
require.Error(t, err)
})
}
func TestLocalAndUpdateSelf(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 1, UplinkCount: 0,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
node := planet.StorageNodes[0]
var group errgroup.Group
group.Go(func() error {
_ = node.Contact.Service.Local()
return nil
})
node.Contact.Service.UpdateSelf(&pb.NodeCapacity{})
_ = group.Wait()
})
}
func TestServiceRequestPingMeQUIC(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 2, StorageNodeCount: 1, UplinkCount: 0,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
node := planet.StorageNodes[0]
node.Contact.Chore.Pause(ctx)
quicStats, err := node.Contact.Service.RequestPingMeQUIC(ctx)
require.NoError(t, err)
require.Equal(t, contact.NetworkStatusOk, quicStats.Status())
})
}