storj/internal/testplanet/planet.go
Jennifer Li Johnson 724bb44723
Remove Kademlia dependencies from Satellite and Storagenode (#2966)
What:

cmd/inspector/main.go: removes kad commands
internal/testplanet/planet.go: Waits for contact chore to finish
satellite/contact/nodesservice.go: creates an empty nodes service implementation
satellite/contact/service.go: implements Local and FetchInfo methods & adds external address config value
satellite/discovery/service.go: replaces kad.FetchInfo with contact.FetchInfo in Refresh() & removes Discover()
satellite/peer.go: sets up contact service and endpoints
storagenode/console/service.go: replaces nodeID with contact.Local()
storagenode/contact/chore.go: replaces routing table with contact service
storagenode/contact/nodesservice.go: creates empty implementation for ping and request info nodes service & implements RequestInfo method
storagenode/contact/service.go: creates a service to return the local node and update its own capacity
storagenode/monitor/monitor.go: uses contact service in place of routing table
storagenode/operator.go: moves operatorconfig from kad into its own setup
storagenode/peer.go: sets up contact service, chore, pingstats and endpoints
satellite/overlay/config.go: changes NodeSelectionConfig.OnlineWindow default to 4hr to allow for accurate repair selection
Removes kademlia setups in:

cmd/storagenode/main.go
cmd/storj-sim/network.go
internal/testplane/planet.go
internal/testplanet/satellite.go
internal/testplanet/storagenode.go
satellite/peer.go
scripts/test-sim-backwards.sh
scripts/testdata/satellite-config.yaml.lock
storagenode/inspector/inspector.go
storagenode/peer.go
storagenode/storagenodedb/database.go
Why: Replacing Kademlia

Please describe the tests:
• internal/testplanet/planet_test.go:

TestBasic: assert that the storagenode can check in with the satellite without any errors
TestContact: test that all nodes get inserted into both satellites' overlay cache during testplanet setup
• satellite/contact/contact_test.go:

TestFetchInfo: Tests that the FetchInfo method returns the correct info
• storagenode/contact/contact_test.go:

TestNodeInfoUpdated: tests that the contact chore updates the node information
TestRequestInfoEndpoint: tests that the Request info endpoint returns the correct info
Please describe the performance impact: Node discovery should be at least slightly more performant since each node connects directly to each satellite and no longer needs to wait for bootstrapping. It probably won't be faster in real time on start up since each node waits a random amount of time (less than 1 hr) to initialize its first connection (jitter).
2019-09-19 15:56:34 -04:00

312 lines
7.6 KiB
Go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information
// Package testplanet implements the full network wiring for testing
package testplanet
import (
"context"
"errors"
"io"
"io/ioutil"
"net"
"os"
"path/filepath"
"sync"
"time"
"github.com/zeebo/errs"
"go.uber.org/zap"
"go.uber.org/zap/zaptest"
"golang.org/x/sync/errgroup"
"storj.io/storj/bootstrap"
"storj.io/storj/internal/testidentity"
"storj.io/storj/pkg/identity"
"storj.io/storj/pkg/storj"
"storj.io/storj/satellite"
"storj.io/storj/satellite/overlay"
"storj.io/storj/storagenode"
"storj.io/storj/versioncontrol"
)
// Peer represents one of StorageNode or Satellite
type Peer interface {
ID() storj.NodeID
Addr() string
URL() storj.NodeURL
Local() overlay.NodeDossier
Run(context.Context) error
Close() error
}
// Config describes planet configuration
type Config struct {
SatelliteCount int
StorageNodeCount int
UplinkCount int
Identities *testidentity.Identities
IdentityVersion *storj.IDVersion
Reconfigure Reconfigure
}
// Planet is a full storj system setup.
type Planet struct {
log *zap.Logger
config Config
directory string // TODO: ensure that everything is in-memory to speed things up
started bool
shutdown bool
peers []closablePeer
databases []io.Closer
uplinks []*Uplink
Bootstrap *bootstrap.Peer
VersionControl *versioncontrol.Peer
Satellites []*SatelliteSystem
StorageNodes []*storagenode.Peer
Uplinks []*Uplink
identities *testidentity.Identities
whitelistPath string // TODO: in-memory
run errgroup.Group
cancel func()
}
// SatelliteSystem contains all the processes needed to run a full Satellite setup
type SatelliteSystem struct {
satellite.Peer
}
type closablePeer struct {
peer Peer
ctx context.Context
cancel func()
close sync.Once
err error
}
// Close closes safely the peer.
func (peer *closablePeer) Close() error {
peer.cancel()
peer.close.Do(func() {
peer.err = peer.peer.Close()
})
return peer.err
}
// New creates a new full system with the given number of nodes.
func New(t zaptest.TestingT, satelliteCount, storageNodeCount, uplinkCount int) (*Planet, error) {
var log *zap.Logger
if t == nil {
log = zap.NewNop()
} else {
log = zaptest.NewLogger(t)
}
return NewWithLogger(log, satelliteCount, storageNodeCount, uplinkCount)
}
// NewWithIdentityVersion creates a new full system with the given version for node identities and the given number of nodes.
func NewWithIdentityVersion(t zaptest.TestingT, identityVersion *storj.IDVersion, satelliteCount, storageNodeCount, uplinkCount int) (*Planet, error) {
var log *zap.Logger
if t == nil {
log = zap.NewNop()
} else {
log = zaptest.NewLogger(t)
}
return NewCustom(log, Config{
SatelliteCount: satelliteCount,
StorageNodeCount: storageNodeCount,
UplinkCount: uplinkCount,
IdentityVersion: identityVersion,
})
}
// NewWithLogger creates a new full system with the given number of nodes.
func NewWithLogger(log *zap.Logger, satelliteCount, storageNodeCount, uplinkCount int) (*Planet, error) {
return NewCustom(log, Config{
SatelliteCount: satelliteCount,
StorageNodeCount: storageNodeCount,
UplinkCount: uplinkCount,
})
}
// NewCustom creates a new full system with the specified configuration.
func NewCustom(log *zap.Logger, config Config) (*Planet, error) {
if config.IdentityVersion == nil {
version := storj.LatestIDVersion()
config.IdentityVersion = &version
}
if config.Identities == nil {
config.Identities = testidentity.NewPregeneratedSignedIdentities(*config.IdentityVersion)
}
planet := &Planet{
log: log,
config: config,
identities: config.Identities,
}
var err error
planet.directory, err = ioutil.TempDir("", "planet")
if err != nil {
return nil, err
}
whitelistPath, err := planet.WriteWhitelist(*config.IdentityVersion)
if err != nil {
return nil, err
}
planet.whitelistPath = whitelistPath
planet.VersionControl, err = planet.newVersionControlServer()
if err != nil {
return nil, errs.Combine(err, planet.Shutdown())
}
planet.Bootstrap, err = planet.newBootstrap()
if err != nil {
return nil, errs.Combine(err, planet.Shutdown())
}
planet.Satellites, err = planet.newSatellites(config.SatelliteCount)
if err != nil {
return nil, errs.Combine(err, planet.Shutdown())
}
whitelistedSatellites := make(storj.NodeURLs, 0, len(planet.Satellites))
for _, satellite := range planet.Satellites {
whitelistedSatellites = append(whitelistedSatellites, satellite.URL())
}
planet.StorageNodes, err = planet.newStorageNodes(config.StorageNodeCount, whitelistedSatellites)
if err != nil {
return nil, errs.Combine(err, planet.Shutdown())
}
planet.Uplinks, err = planet.newUplinks("uplink", config.UplinkCount, config.StorageNodeCount)
if err != nil {
return nil, errs.Combine(err, planet.Shutdown())
}
return planet, nil
}
// Start starts all the nodes.
func (planet *Planet) Start(ctx context.Context) {
ctx, cancel := context.WithCancel(ctx)
planet.cancel = cancel
planet.run.Go(func() error {
return planet.VersionControl.Run(ctx)
})
for i := range planet.peers {
peer := &planet.peers[i]
peer.ctx, peer.cancel = context.WithCancel(ctx)
planet.run.Go(func() error {
return peer.peer.Run(peer.ctx)
})
}
for _, peer := range planet.StorageNodes {
peer.Contact.Chore.Loop.TriggerWait()
}
planet.started = true
}
// StopPeer stops a single peer in the planet
func (planet *Planet) StopPeer(peer Peer) error {
for i := range planet.peers {
p := &planet.peers[i]
if p.peer == peer {
return p.Close()
}
}
return errors.New("unknown peer")
}
// Size returns number of nodes in the network
func (planet *Planet) Size() int { return len(planet.uplinks) + len(planet.peers) }
// Shutdown shuts down all the nodes and deletes temporary directories.
func (planet *Planet) Shutdown() error {
if !planet.started {
return errors.New("Start was never called")
}
if planet.shutdown {
panic("double Shutdown")
}
planet.shutdown = true
planet.cancel()
var errlist errs.Group
ctx, cancel := context.WithCancel(context.Background())
go func() {
// TODO: add diagnostics to see what hasn't been properly shut down
timer := time.NewTimer(30 * time.Second)
defer timer.Stop()
select {
case <-timer.C:
panic("planet took too long to shutdown")
case <-ctx.Done():
}
}()
errlist.Add(planet.run.Wait())
cancel()
// shutdown in reverse order
for i := len(planet.uplinks) - 1; i >= 0; i-- {
node := planet.uplinks[i]
errlist.Add(node.Shutdown())
}
for i := len(planet.peers) - 1; i >= 0; i-- {
peer := &planet.peers[i]
errlist.Add(peer.Close())
}
for _, db := range planet.databases {
errlist.Add(db.Close())
}
errlist.Add(planet.VersionControl.Close())
errlist.Add(os.RemoveAll(planet.directory))
return errlist.Err()
}
// Identities returns the identity provider for this planet.
func (planet *Planet) Identities() *testidentity.Identities {
return planet.identities
}
// NewIdentity creates a new identity for a node
func (planet *Planet) NewIdentity() (*identity.FullIdentity, error) {
return planet.identities.NewIdentity()
}
// NewListener creates a new listener
func (planet *Planet) NewListener() (net.Listener, error) {
return net.Listen("tcp", "127.0.0.1:0")
}
// WriteWhitelist writes the pregenerated signer's CA cert to a "CA whitelist", PEM-encoded.
func (planet *Planet) WriteWhitelist(version storj.IDVersion) (string, error) {
whitelistPath := filepath.Join(planet.directory, "whitelist.pem")
signer := testidentity.NewPregeneratedSigner(version)
err := identity.PeerCAConfig{
CertPath: whitelistPath,
}.Save(signer.PeerCA())
return whitelistPath, err
}