Ensure everyone sees everyone else (#1275)

This commit is contained in:
Egon Elbre 2019-02-08 11:25:13 +02:00 committed by GitHub
parent 5a13bdec18
commit 9c1e299f3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 155 additions and 136 deletions

View File

@ -124,8 +124,10 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, config Config) (*P
return nil, errs.Combine(err, peer.Close()) return nil, errs.Combine(err, peer.Close())
} }
peer.Transport = peer.Transport.WithObservers(peer.Kademlia.RoutingTable)
// TODO: reduce number of arguments // TODO: reduce number of arguments
peer.Kademlia.Service, err = kademlia.NewService(peer.Log.Named("kademlia"), self, config.BootstrapNodes(), peer.Identity, config.Alpha, peer.Kademlia.RoutingTable) peer.Kademlia.Service, err = kademlia.NewService(peer.Log.Named("kademlia"), self, config.BootstrapNodes(), peer.Transport, config.Alpha, peer.Kademlia.RoutingTable)
if err != nil { if err != nil {
return nil, errs.Combine(err, peer.Close()) return nil, errs.Combine(err, peer.Close())
} }
@ -148,7 +150,7 @@ func (peer *Peer) Run(ctx context.Context) error {
return ignoreCancel(peer.Kademlia.Service.Bootstrap(ctx)) return ignoreCancel(peer.Kademlia.Service.Bootstrap(ctx))
}) })
group.Go(func() error { group.Go(func() error {
return ignoreCancel(peer.Kademlia.Service.RunRefresh(ctx)) return ignoreCancel(peer.Kademlia.Service.Run(ctx))
}) })
group.Go(func() error { group.Go(func() error {
// TODO: move the message into Server instead // TODO: move the message into Server instead

View File

@ -124,6 +124,8 @@ func (cycle *Cycle) Run(ctx context.Context, fn func(ctx context.Context) error)
// Close closes all resources associated with it. // Close closes all resources associated with it.
func (cycle *Cycle) Close() { func (cycle *Cycle) Close() {
cycle.Stop()
<-cycle.stop
close(cycle.control) close(cycle.control)
} }

View File

@ -214,12 +214,54 @@ func (planet *Planet) Start(ctx context.Context) {
planet.started = true planet.started = true
planet.Bootstrap.Kademlia.Service.WaitForBootstrap()
for _, peer := range planet.Satellites { for _, peer := range planet.Satellites {
peer.Kademlia.Service.WaitForBootstrap() peer.Kademlia.Service.WaitForBootstrap()
} }
for _, peer := range planet.StorageNodes { for _, peer := range planet.StorageNodes {
peer.Kademlia.Service.WaitForBootstrap() peer.Kademlia.Service.WaitForBootstrap()
} }
planet.Reconnect(ctx)
}
// Reconnect reconnects all nodes with each other.
func (planet *Planet) Reconnect(ctx context.Context) {
log := planet.log.Named("reconnect")
var group errgroup.Group
// TODO: instead of pinging try to use Lookups or natural discovery to ensure
// everyone finds everyone else
for _, storageNode := range planet.StorageNodes {
storageNode := storageNode
group.Go(func() error {
_, err := storageNode.Kademlia.Service.Ping(ctx, planet.Bootstrap.Local())
if err != nil {
log.Error("storage node did not find bootstrap", zap.Error(err))
}
return nil
})
}
for _, satellite := range planet.Satellites {
satellite := satellite
group.Go(func() error {
for _, storageNode := range planet.StorageNodes {
_, err := satellite.Kademlia.Service.Ping(ctx, storageNode.Local())
if err != nil {
log.Error("satellite did not find storage node", zap.Error(err))
}
}
satellite.Discovery.Service.Refresh.TriggerWait()
return nil
})
}
_ = group.Wait() // none of the goroutines return an error
} }
// StopPeer stops a single peer in the planet // StopPeer stops a single peer in the planet
@ -298,6 +340,7 @@ func (planet *Planet) newUplinks(prefix string, count, storageNodeCount int) ([]
// newSatellites initializes satellites // newSatellites initializes satellites
func (planet *Planet) newSatellites(count int) ([]*satellite.Peer, error) { func (planet *Planet) newSatellites(count int) ([]*satellite.Peer, error) {
// TODO: move into separate file
var xs []*satellite.Peer var xs []*satellite.Peer
defer func() { defer func() {
for _, x := range xs { for _, x := range xs {
@ -355,7 +398,6 @@ func (planet *Planet) newSatellites(count int) ([]*satellite.Peer, error) {
}, },
}, },
Overlay: overlay.Config{ Overlay: overlay.Config{
RefreshInterval: 30 * time.Second,
Node: overlay.NodeSelectionConfig{ Node: overlay.NodeSelectionConfig{
UptimeRatio: 0, UptimeRatio: 0,
UptimeCount: 0, UptimeCount: 0,
@ -425,6 +467,7 @@ func (planet *Planet) newSatellites(count int) ([]*satellite.Peer, error) {
// newStorageNodes initializes storage nodes // newStorageNodes initializes storage nodes
func (planet *Planet) newStorageNodes(count int) ([]*storagenode.Peer, error) { func (planet *Planet) newStorageNodes(count int) ([]*storagenode.Peer, error) {
// TODO: move into separate file
var xs []*storagenode.Peer var xs []*storagenode.Peer
defer func() { defer func() {
for _, x := range xs { for _, x := range xs {
@ -508,6 +551,7 @@ func (planet *Planet) newStorageNodes(count int) ([]*storagenode.Peer, error) {
// newBootstrap initializes the bootstrap node // newBootstrap initializes the bootstrap node
func (planet *Planet) newBootstrap() (peer *bootstrap.Peer, err error) { func (planet *Planet) newBootstrap() (peer *bootstrap.Peer, err error) {
// TODO: move into separate file
defer func() { defer func() {
planet.peers = append(planet.peers, closablePeer{peer: peer}) planet.peers = append(planet.peers, closablePeer{peer: peer})
}() }()

View File

@ -6,7 +6,6 @@ package testplanet_test
import ( import (
"crypto/rand" "crypto/rand"
"testing" "testing"
"time"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -25,7 +24,6 @@ func TestUploadDownload(t *testing.T) {
defer ctx.Check(planet.Shutdown) defer ctx.Check(planet.Shutdown)
planet.Start(ctx) planet.Start(ctx)
time.Sleep(2 * time.Second)
expectedData := make([]byte, 5*memory.MiB) expectedData := make([]byte, 5*memory.MiB)
_, err = rand.Read(expectedData) _, err = rand.Read(expectedData)

View File

@ -22,8 +22,6 @@ func TestQuery(t *testing.T) {
atRest := float64(5000) atRest := float64(5000)
bw := []int64{1000, 2000, 3000, 4000} bw := []int64{1000, 2000, 3000, 4000}
time.Sleep(time.Second * 2)
nodeData, bwTotals := createData(planet, atRest, bw) nodeData, bwTotals := createData(planet, atRest, bw)
// Set timestamp back by the number of days we want to save // Set timestamp back by the number of days we want to save

View File

@ -9,7 +9,6 @@ import (
"math/big" "math/big"
"reflect" "reflect"
"testing" "testing"
"time"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -27,9 +26,6 @@ func TestAuditSegment(t *testing.T) {
testplanet.Run(t, testplanet.Config{ testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1, SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
// we wait a second for all the nodes to complete bootstrapping off the satellite
time.Sleep(2 * time.Second)
type pathCount struct { type pathCount struct {
path storj.Path path storj.Path
count int count int

View File

@ -10,7 +10,9 @@ import (
"github.com/zeebo/errs" "github.com/zeebo/errs"
"go.uber.org/zap" "go.uber.org/zap"
"golang.org/x/sync/errgroup"
"storj.io/storj/internal/sync2"
"storj.io/storj/pkg/kademlia" "storj.io/storj/pkg/kademlia"
"storj.io/storj/pkg/overlay" "storj.io/storj/pkg/overlay"
"storj.io/storj/pkg/statdb" "storj.io/storj/pkg/statdb"
@ -38,69 +40,68 @@ type Discovery struct {
cache *overlay.Cache cache *overlay.Cache
kad *kademlia.Kademlia kad *kademlia.Kademlia
statdb statdb.DB statdb statdb.DB
config Config
// refreshOffset tracks the offset of the current refresh cycle // refreshOffset tracks the offset of the current refresh cycle
refreshOffset int64 refreshOffset int64
refreshLimit int
Refresh sync2.Cycle
Graveyard sync2.Cycle
Discovery sync2.Cycle
} }
// New returns a new discovery service. // New returns a new discovery service.
func New(logger *zap.Logger, ol *overlay.Cache, kad *kademlia.Kademlia, stat statdb.DB, config Config) *Discovery { func New(logger *zap.Logger, ol *overlay.Cache, kad *kademlia.Kademlia, stat statdb.DB, config Config) *Discovery {
return &Discovery{ discovery := &Discovery{
log: logger, log: logger,
cache: ol, cache: ol,
kad: kad, kad: kad,
statdb: stat, statdb: stat,
config: config,
refreshOffset: 0, refreshOffset: 0,
refreshLimit: config.RefreshLimit,
} }
}
// NewDiscovery Returns a new Discovery instance with cache, kad, and statdb loaded on discovery.Refresh.SetInterval(config.RefreshInterval)
func NewDiscovery(logger *zap.Logger, ol *overlay.Cache, kad *kademlia.Kademlia, stat statdb.DB, config Config) *Discovery { discovery.Graveyard.SetInterval(config.GraveyardInterval)
return &Discovery{ discovery.Discovery.SetInterval(config.DiscoveryInterval)
log: logger,
cache: ol, return discovery
kad: kad,
statdb: stat,
config: config,
}
} }
// Close closes resources // Close closes resources
func (discovery *Discovery) Close() error { return nil } func (discovery *Discovery) Close() error {
discovery.Refresh.Close()
discovery.Graveyard.Close()
discovery.Discovery.Close()
return nil
}
// Run runs the discovery service // Run runs the discovery service
func (discovery *Discovery) Run(ctx context.Context) error { func (discovery *Discovery) Run(ctx context.Context) error {
refresh := time.NewTicker(discovery.config.RefreshInterval) var group errgroup.Group
graveyard := time.NewTicker(discovery.config.GraveyardInterval) discovery.Refresh.Start(ctx, &group, func(ctx context.Context) error {
discover := time.NewTicker(discovery.config.DiscoveryInterval) err := discovery.refresh(ctx)
defer refresh.Stop() if err != nil {
defer graveyard.Stop() discovery.log.Error("error with cache refresh: ", zap.Error(err))
defer discover.Stop()
for {
select {
case <-refresh.C:
err := discovery.refresh(ctx)
if err != nil {
discovery.log.Error("error with cache refresh: ", zap.Error(err))
}
case <-discover.C:
err := discovery.discover(ctx)
if err != nil {
discovery.log.Error("error with cache discovery: ", zap.Error(err))
}
case <-graveyard.C:
err := discovery.searchGraveyard(ctx)
if err != nil {
discovery.log.Error("graveyard resurrection failed: ", zap.Error(err))
}
case <-ctx.Done():
return ctx.Err()
} }
} return nil
})
discovery.Graveyard.Start(ctx, &group, func(ctx context.Context) error {
err := discovery.searchGraveyard(ctx)
if err != nil {
discovery.log.Error("graveyard resurrection failed: ", zap.Error(err))
}
return nil
})
discovery.Discovery.Start(ctx, &group, func(ctx context.Context) error {
err := discovery.discover(ctx)
if err != nil {
discovery.log.Error("error with cache discovery: ", zap.Error(err))
}
return nil
})
return group.Wait()
} }
// refresh updates the cache db with the current DHT. // refresh updates the cache db with the current DHT.
@ -114,7 +115,7 @@ func (discovery *Discovery) refresh(ctx context.Context) error {
} }
} }
list, more, err := discovery.cache.Paginate(ctx, discovery.refreshOffset, discovery.config.RefreshLimit) list, more, err := discovery.cache.Paginate(ctx, discovery.refreshOffset, discovery.refreshLimit)
if err != nil { if err != nil {
return Error.Wrap(err) return Error.Wrap(err)
} }

View File

@ -5,7 +5,6 @@ package discovery_test
import ( import (
"testing" "testing"
"time"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -17,8 +16,6 @@ func TestCache_Refresh(t *testing.T) {
testplanet.Run(t, testplanet.Config{ testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 10, UplinkCount: 0, SatelliteCount: 1, StorageNodeCount: 10, UplinkCount: 0,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
time.Sleep(5 * time.Second)
satellite := planet.Satellites[0] satellite := planet.Satellites[0]
for _, storageNode := range planet.StorageNodes { for _, storageNode := range planet.StorageNodes {
node, err := satellite.Overlay.Service.Get(ctx, storageNode.ID()) node, err := satellite.Overlay.Service.Get(ctx, storageNode.ID())

View File

@ -6,7 +6,6 @@ package kademlia_test
import ( import (
"fmt" "fmt"
"testing" "testing"
"time"
"github.com/zeebo/errs" "github.com/zeebo/errs"
"go.uber.org/zap/zaptest" "go.uber.org/zap/zaptest"
@ -23,7 +22,6 @@ func TestDialer(t *testing.T) {
testplanet.Run(t, testplanet.Config{ testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 3, SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 3,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
time.Sleep(2 * time.Second)
expectedKademliaEntries := 1 + len(planet.Satellites) + len(planet.StorageNodes) expectedKademliaEntries := 1 + len(planet.Satellites) + len(planet.StorageNodes)
// TODO: also use satellites // TODO: also use satellites

View File

@ -47,22 +47,21 @@ type Kademlia struct {
routingTable *RoutingTable routingTable *RoutingTable
bootstrapNodes []pb.Node bootstrapNodes []pb.Node
dialer *Dialer dialer *Dialer
identity *identity.FullIdentity
lookups sync2.WorkGroup lookups sync2.WorkGroup
bootstrapFinished sync2.Fence bootstrapFinished sync2.Fence
RefreshBuckets sync2.Cycle
} }
// NewService returns a newly configured Kademlia instance // NewService returns a newly configured Kademlia instance
func NewService(log *zap.Logger, self pb.Node, bootstrapNodes []pb.Node, identity *identity.FullIdentity, alpha int, rt *RoutingTable) (*Kademlia, error) { func NewService(log *zap.Logger, self pb.Node, bootstrapNodes []pb.Node, transport transport.Client, alpha int, rt *RoutingTable) (*Kademlia, error) {
k := &Kademlia{ k := &Kademlia{
log: log, log: log,
alpha: alpha, alpha: alpha,
routingTable: rt, routingTable: rt,
bootstrapNodes: bootstrapNodes, bootstrapNodes: bootstrapNodes,
identity: identity, dialer: NewDialer(log.Named("dialer"), transport),
dialer: NewDialer(log.Named("dialer"), transport.NewClient(identity, rt)),
} }
return k, nil return k, nil
@ -283,25 +282,21 @@ func (k *Kademlia) Seen() []*pb.Node {
return nodes return nodes
} }
// RunRefresh occasionally refreshes stale kad buckets // Run occasionally refreshes stale kad buckets
func (k *Kademlia) RunRefresh(ctx context.Context) error { func (k *Kademlia) Run(ctx context.Context) error {
if !k.lookups.Start() { if !k.lookups.Start() {
return context.Canceled return context.Canceled
} }
defer k.lookups.Done() defer k.lookups.Done()
ticker := time.NewTicker(5 * time.Minute) k.RefreshBuckets.SetInterval(5 * time.Minute)
for { return k.RefreshBuckets.Run(ctx, func(ctx context.Context) error {
if err := k.refresh(ctx, time.Minute); err != nil { err := k.refresh(ctx, time.Minute)
if err != nil {
k.log.Warn("bucket refresh failed", zap.Error(err)) k.log.Warn("bucket refresh failed", zap.Error(err))
} }
select { return nil
case <-ticker.C: })
case <-ctx.Done():
ticker.Stop()
return ctx.Err()
}
}
} }
// refresh updates each Kademlia bucket not contacted in the last hour // refresh updates each Kademlia bucket not contacted in the last hour

View File

@ -29,6 +29,7 @@ import (
"storj.io/storj/pkg/identity" "storj.io/storj/pkg/identity"
"storj.io/storj/pkg/pb" "storj.io/storj/pkg/pb"
"storj.io/storj/pkg/storj" "storj.io/storj/pkg/storj"
"storj.io/storj/pkg/transport"
"storj.io/storj/storage/teststore" "storj.io/storj/storage/teststore"
) )
@ -543,5 +544,5 @@ func newKademlia(log *zap.Logger, nodeType pb.NodeType, bootstrapNodes []pb.Node
return nil, BootstrapErr.Wrap(err) return nil, BootstrapErr.Wrap(err)
} }
return NewService(log, self, bootstrapNodes, identity, alpha, rt) return NewService(log, self, bootstrapNodes, transport.NewClient(identity, rt), alpha, rt)
} }

View File

@ -10,7 +10,6 @@ import (
"io" "io"
mathrand "math/rand" mathrand "math/rand"
"testing" "testing"
"time"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -120,9 +119,6 @@ func TestGetObject(t *testing.T) {
func TestGetObjectStream(t *testing.T) { func TestGetObjectStream(t *testing.T) {
runTest(t, func(ctx context.Context, planet *testplanet.Planet, db *kvmetainfo.DB, buckets buckets.Store, streams streams.Store) { runTest(t, func(ctx context.Context, planet *testplanet.Planet, db *kvmetainfo.DB, buckets buckets.Store, streams streams.Store) {
// we wait a second for all the nodes to complete bootstrapping off the satellite
time.Sleep(2 * time.Second)
data := make([]byte, 32*memory.KB) data := make([]byte, 32*memory.KB)
_, err := rand.Read(data) _, err := rand.Read(data)
if !assert.NoError(t, err) { if !assert.NoError(t, err) {

View File

@ -83,8 +83,6 @@ func TestUploadDownload(t *testing.T) {
planet.Start(ctx) planet.Start(ctx)
time.Sleep(2 * time.Second)
// create identity for gateway // create identity for gateway
ca, err := testidentity.NewTestCA(ctx) ca, err := testidentity.NewTestCA(ctx)
assert.NoError(t, err) assert.NoError(t, err)

View File

@ -5,7 +5,6 @@ package overlay_test
import ( import (
"testing" "testing"
"time"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -23,9 +22,6 @@ func TestChoose(t *testing.T) {
testplanet.Run(t, testplanet.Config{ testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 8, UplinkCount: 1, SatelliteCount: 1, StorageNodeCount: 8, UplinkCount: 1,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
// we wait a second for all the nodes to complete bootstrapping off the satellite
time.Sleep(2 * time.Second)
oc, err := planet.Uplinks[0].DialOverlay(planet.Satellites[0]) oc, err := planet.Uplinks[0].DialOverlay(planet.Satellites[0])
require.NoError(t, err) require.NoError(t, err)
@ -63,9 +59,6 @@ func TestLookup(t *testing.T) {
testplanet.Run(t, testplanet.Config{ testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1, SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
// we wait a second for all the nodes to complete bootstrapping off the satellite
time.Sleep(2 * time.Second)
oc, err := planet.Uplinks[0].DialOverlay(planet.Satellites[0]) oc, err := planet.Uplinks[0].DialOverlay(planet.Satellites[0])
require.NoError(t, err) require.NoError(t, err)
@ -105,9 +98,6 @@ func TestBulkLookup(t *testing.T) {
testplanet.Run(t, testplanet.Config{ testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1, SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
// we wait a second for all the nodes to complete bootstrapping off the satellite
time.Sleep(2 * time.Second)
oc, err := planet.Uplinks[0].DialOverlay(planet.Satellites[0]) oc, err := planet.Uplinks[0].DialOverlay(planet.Satellites[0])
require.NoError(t, err) require.NoError(t, err)
@ -143,9 +133,6 @@ func TestBulkLookupV2(t *testing.T) {
testplanet.Run(t, testplanet.Config{ testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1, SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
// we wait a second for all the nodes to complete bootstrapping off the satellite
time.Sleep(2 * time.Second)
oc, err := planet.Uplinks[0].DialOverlay(planet.Satellites[0]) oc, err := planet.Uplinks[0].DialOverlay(planet.Satellites[0])
require.NoError(t, err) require.NoError(t, err)

View File

@ -5,7 +5,6 @@ package overlay
import ( import (
"strings" "strings"
"time"
"github.com/zeebo/errs" "github.com/zeebo/errs"
monkit "gopkg.in/spacemonkeygo/monkit.v2" monkit "gopkg.in/spacemonkeygo/monkit.v2"
@ -23,8 +22,7 @@ var (
// Config is a configuration struct for everything you need to start the // Config is a configuration struct for everything you need to start the
// Overlay cache responsibility. // Overlay cache responsibility.
type Config struct { type Config struct {
RefreshInterval time.Duration `help:"the interval at which the cache refreshes itself in seconds" default:"1s"` Node NodeSelectionConfig
Node NodeSelectionConfig
} }
// LookupConfig is a configuration struct for querying the overlay cache with one or more node IDs // LookupConfig is a configuration struct for querying the overlay cache with one or more node IDs

View File

@ -5,7 +5,6 @@ package overlay_test
import ( import (
"testing" "testing"
"time"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -22,9 +21,6 @@ func TestServer(t *testing.T) {
testplanet.Run(t, testplanet.Config{ testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1, SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
// we wait a second for all the nodes to complete bootstrapping off the satellite
time.Sleep(2 * time.Second)
satellite := planet.Satellites[0] satellite := planet.Satellites[0]
server := satellite.Overlay.Endpoint server := satellite.Overlay.Endpoint
// TODO: handle cleanup // TODO: handle cleanup
@ -67,9 +63,6 @@ func TestNodeSelection(t *testing.T) {
var err error var err error
satellite := planet.Satellites[0] satellite := planet.Satellites[0]
// we wait a second for all the nodes to complete bootstrapping off the satellite
time.Sleep(2 * time.Second)
// This sets a reputable audit count for a certain number of nodes. // This sets a reputable audit count for a certain number of nodes.
for i, node := range planet.StorageNodes { for i, node := range planet.StorageNodes {
for k := 0; k < i; k++ { for k := 0; k < i; k++ {

View File

@ -36,6 +36,7 @@ type Client interface {
DialNode(ctx context.Context, node *pb.Node, opts ...grpc.DialOption) (*grpc.ClientConn, error) DialNode(ctx context.Context, node *pb.Node, opts ...grpc.DialOption) (*grpc.ClientConn, error)
DialAddress(ctx context.Context, address string, opts ...grpc.DialOption) (*grpc.ClientConn, error) DialAddress(ctx context.Context, address string, opts ...grpc.DialOption) (*grpc.ClientConn, error)
Identity() *identity.FullIdentity Identity() *identity.FullIdentity
WithObservers(obs ...Observer) *Transport
} }
// Transport interface structure // Transport interface structure
@ -109,6 +110,14 @@ func (transport *Transport) Identity() *identity.FullIdentity {
return transport.identity return transport.identity
} }
// WithObservers returns a new transport including the listed observers.
func (transport *Transport) WithObservers(obs ...Observer) *Transport {
tr := &Transport{identity: transport.identity}
tr.observers = append(tr.observers, transport.observers...)
tr.observers = append(tr.observers, obs...)
return tr
}
func alertFail(ctx context.Context, obs []Observer, node *pb.Node, err error) { func alertFail(ctx context.Context, obs []Observer, node *pb.Node, err error) {
for _, o := range obs { for _, o := range obs {
o.ConnFailure(ctx, node, err) o.ConnFailure(ctx, node, err)

View File

@ -198,6 +198,28 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, config *Config) (*
} }
} }
{ // setup overlay
config := config.Overlay
peer.Overlay.Service = overlay.NewCache(peer.DB.OverlayCache(), peer.DB.StatDB())
// TODO: should overlay service be wired up to transport?
nodeSelectionConfig := &overlay.NodeSelectionConfig{
UptimeCount: config.Node.UptimeCount,
UptimeRatio: config.Node.UptimeRatio,
AuditSuccessRatio: config.Node.AuditSuccessRatio,
AuditCount: config.Node.AuditCount,
NewNodeAuditThreshold: config.Node.NewNodeAuditThreshold,
NewNodePercentage: config.Node.NewNodePercentage,
}
peer.Overlay.Endpoint = overlay.NewServer(peer.Log.Named("overlay:endpoint"), peer.Overlay.Service, nodeSelectionConfig)
pb.RegisterOverlayServer(peer.Public.Server.GRPC(), peer.Overlay.Endpoint)
peer.Overlay.Inspector = overlay.NewInspector(peer.Overlay.Service)
pb.RegisterOverlayInspectorServer(peer.Public.Server.GRPC(), peer.Overlay.Inspector)
}
{ // setup kademlia { // setup kademlia
config := config.Kademlia config := config.Kademlia
// TODO: move this setup logic into kademlia package // TODO: move this setup logic into kademlia package
@ -236,10 +258,12 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, config *Config) (*
if err != nil { if err != nil {
return nil, errs.Combine(err, peer.Close()) return nil, errs.Combine(err, peer.Close())
} }
peer.Transport = peer.Transport.WithObservers(peer.Kademlia.RoutingTable)
} }
// TODO: reduce number of arguments // TODO: reduce number of arguments
peer.Kademlia.Service, err = kademlia.NewService(peer.Log.Named("kademlia"), self, config.BootstrapNodes(), peer.Identity, config.Alpha, peer.Kademlia.RoutingTable) peer.Kademlia.Service, err = kademlia.NewService(peer.Log.Named("kademlia"), self, config.BootstrapNodes(), peer.Transport, config.Alpha, peer.Kademlia.RoutingTable)
if err != nil { if err != nil {
return nil, errs.Combine(err, peer.Close()) return nil, errs.Combine(err, peer.Close())
} }
@ -251,26 +275,6 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, config *Config) (*
pb.RegisterKadInspectorServer(peer.Public.Server.GRPC(), peer.Kademlia.Inspector) pb.RegisterKadInspectorServer(peer.Public.Server.GRPC(), peer.Kademlia.Inspector)
} }
{ // setup overlay
config := config.Overlay
peer.Overlay.Service = overlay.NewCache(peer.DB.OverlayCache(), peer.DB.StatDB())
nodeSelectionConfig := &overlay.NodeSelectionConfig{
UptimeCount: config.Node.UptimeCount,
UptimeRatio: config.Node.UptimeRatio,
AuditSuccessRatio: config.Node.AuditSuccessRatio,
AuditCount: config.Node.AuditCount,
NewNodeAuditThreshold: config.Node.NewNodeAuditThreshold,
NewNodePercentage: config.Node.NewNodePercentage,
}
peer.Overlay.Endpoint = overlay.NewServer(peer.Log.Named("overlay:endpoint"), peer.Overlay.Service, nodeSelectionConfig)
pb.RegisterOverlayServer(peer.Public.Server.GRPC(), peer.Overlay.Endpoint)
peer.Overlay.Inspector = overlay.NewInspector(peer.Overlay.Service)
pb.RegisterOverlayInspectorServer(peer.Public.Server.GRPC(), peer.Overlay.Inspector)
}
{ // setup reputation { // setup reputation
// TODO: find better structure with overlay // TODO: find better structure with overlay
peer.Reputation.Inspector = statdb.NewInspector(peer.DB.StatDB()) peer.Reputation.Inspector = statdb.NewInspector(peer.DB.StatDB())
@ -385,7 +389,7 @@ func (peer *Peer) Run(ctx context.Context) error {
return ignoreCancel(peer.Kademlia.Service.Bootstrap(ctx)) return ignoreCancel(peer.Kademlia.Service.Bootstrap(ctx))
}) })
group.Go(func() error { group.Go(func() error {
return ignoreCancel(peer.Kademlia.Service.RunRefresh(ctx)) return ignoreCancel(peer.Kademlia.Service.Run(ctx))
}) })
group.Go(func() error { group.Go(func() error {
return ignoreCancel(peer.Discovery.Service.Run(ctx)) return ignoreCancel(peer.Discovery.Service.Run(ctx))
@ -464,13 +468,6 @@ func (peer *Peer) Close() error {
errlist.Add(peer.Discovery.Service.Close()) errlist.Add(peer.Discovery.Service.Close())
} }
if peer.Overlay.Endpoint != nil {
errlist.Add(peer.Overlay.Endpoint.Close())
}
if peer.Overlay.Service != nil {
errlist.Add(peer.Overlay.Service.Close())
}
// TODO: add kademlia.Endpoint for consistency // TODO: add kademlia.Endpoint for consistency
if peer.Kademlia.Service != nil { if peer.Kademlia.Service != nil {
errlist.Add(peer.Kademlia.Service.Close()) errlist.Add(peer.Kademlia.Service.Close())
@ -479,6 +476,13 @@ func (peer *Peer) Close() error {
errlist.Add(peer.Kademlia.RoutingTable.Close()) errlist.Add(peer.Kademlia.RoutingTable.Close())
} }
if peer.Overlay.Endpoint != nil {
errlist.Add(peer.Overlay.Endpoint.Close())
}
if peer.Overlay.Service != nil {
errlist.Add(peer.Overlay.Service.Close())
}
if peer.Kademlia.ndb != nil || peer.Kademlia.kdb != nil { if peer.Kademlia.ndb != nil || peer.Kademlia.kdb != nil {
errlist.Add(peer.Kademlia.kdb.Close()) errlist.Add(peer.Kademlia.kdb.Close())
errlist.Add(peer.Kademlia.ndb.Close()) errlist.Add(peer.Kademlia.ndb.Close())

View File

@ -142,8 +142,10 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, config Config) (*P
return nil, errs.Combine(err, peer.Close()) return nil, errs.Combine(err, peer.Close())
} }
peer.Transport = peer.Transport.WithObservers(peer.Kademlia.RoutingTable)
// TODO: reduce number of arguments // TODO: reduce number of arguments
peer.Kademlia.Service, err = kademlia.NewService(peer.Log.Named("kademlia"), self, config.BootstrapNodes(), peer.Identity, config.Alpha, peer.Kademlia.RoutingTable) peer.Kademlia.Service, err = kademlia.NewService(peer.Log.Named("kademlia"), self, config.BootstrapNodes(), peer.Transport, config.Alpha, peer.Kademlia.RoutingTable)
if err != nil { if err != nil {
return nil, errs.Combine(err, peer.Close()) return nil, errs.Combine(err, peer.Close())
} }
@ -191,7 +193,7 @@ func (peer *Peer) Run(ctx context.Context) error {
return ignoreCancel(peer.Kademlia.Service.Bootstrap(ctx)) return ignoreCancel(peer.Kademlia.Service.Bootstrap(ctx))
}) })
group.Go(func() error { group.Go(func() error {
return ignoreCancel(peer.Kademlia.Service.RunRefresh(ctx)) return ignoreCancel(peer.Kademlia.Service.Run(ctx))
}) })
group.Go(func() error { group.Go(func() error {
return ignoreCancel(peer.Agreements.Sender.Run(ctx)) return ignoreCancel(peer.Agreements.Sender.Run(ctx))