storj/pkg/kademlia/peer_discovery.go

252 lines
5.6 KiB
Go
Raw Normal View History

2019-01-24 20:15:10 +00:00
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package kademlia
import (
"context"
2018-12-04 15:46:53 +00:00
"sort"
"sync"
"github.com/zeebo/errs"
"go.uber.org/zap"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/storj"
)
type peerDiscovery struct {
log *zap.Logger
dialer *Dialer
self *pb.Node
target storj.NodeID
k int
concurrency int
cond sync.Cond
2018-12-04 15:46:53 +00:00
queue discoveryQueue
}
// ErrMaxRetries is used when a lookup has been retried the max number of times
var ErrMaxRetries = errs.Class("max retries exceeded for id:")
func newPeerDiscovery(log *zap.Logger, dialer *Dialer, target storj.NodeID, startingNodes []*pb.Node, k, alpha int, self *pb.Node) *peerDiscovery {
2018-12-04 15:46:53 +00:00
discovery := &peerDiscovery{
log: log,
dialer: dialer,
self: self,
target: target,
k: k,
concurrency: alpha,
cond: sync.Cond{L: &sync.Mutex{}},
queue: *newDiscoveryQueue(target, k),
}
discovery.queue.Insert(startingNodes...)
2018-12-04 15:46:53 +00:00
return discovery
}
func (lookup *peerDiscovery) Run(ctx context.Context) (_ []*pb.Node, err error) {
defer mon.Task()(&ctx)(&err)
if lookup.queue.Unqueried() == 0 {
return nil, nil
2018-12-04 15:46:53 +00:00
}
// protected by `lookup.cond.L`
working := 0
allDone := false
wg := sync.WaitGroup{}
wg.Add(lookup.concurrency)
for i := 0; i < lookup.concurrency; i++ {
go func() {
defer wg.Done()
for {
2018-12-04 15:46:53 +00:00
var next *pb.Node
lookup.cond.L.Lock()
for {
// everything is done, this routine can return
if allDone {
lookup.cond.L.Unlock()
return
}
next = lookup.queue.ClosestUnqueried()
if next != nil {
working++
break
}
// no work, wait until some other routine inserts into the queue
lookup.cond.Wait()
}
lookup.cond.L.Unlock()
neighbors, err := lookup.dialer.Lookup(ctx, lookup.self, *next, lookup.target, lookup.k)
if err != nil {
lookup.queue.QueryFailure(next)
if !isDone(ctx) {
2019-01-02 18:07:49 +00:00
lookup.log.Debug("connecting to node failed",
zap.Any("target", lookup.target),
zap.Any("dial-node", next.Id),
2019-01-02 18:07:49 +00:00
zap.Any("dial-address", next.Address.Address),
zap.Error(err),
)
}
} else {
lookup.queue.QuerySuccess(next, neighbors...)
}
lookup.cond.L.Lock()
working--
allDone = allDone || isDone(ctx) || (working == 0 && lookup.queue.Unqueried() == 0)
lookup.cond.L.Unlock()
lookup.cond.Broadcast()
}
}()
}
wg.Wait()
return lookup.queue.ClosestQueried(), ctx.Err()
}
func isDone(ctx context.Context) bool {
select {
case <-ctx.Done():
return true
default:
return false
}
}
2018-12-04 15:46:53 +00:00
type queueState int
const (
stateUnqueried queueState = iota
stateQuerying
stateSuccess
stateFailure
)
2018-12-04 15:46:53 +00:00
// discoveryQueue is a limited priority queue for nodes with xor distance
type discoveryQueue struct {
target storj.NodeID
2018-12-04 15:46:53 +00:00
maxLen int
mu sync.Mutex
state map[storj.NodeID]queueState
2018-12-04 15:46:53 +00:00
items []queueItem
}
// queueItem is node with a priority
type queueItem struct {
node *pb.Node
priority storj.NodeID
}
// newDiscoveryQueue returns a items with priority based on XOR from targetBytes
func newDiscoveryQueue(target storj.NodeID, size int) *discoveryQueue {
2018-12-04 15:46:53 +00:00
return &discoveryQueue{
target: target,
state: make(map[storj.NodeID]queueState),
2018-12-04 15:46:53 +00:00
maxLen: size,
}
}
// Insert adds nodes into the queue.
func (queue *discoveryQueue) Insert(nodes ...*pb.Node) {
2018-12-04 15:46:53 +00:00
queue.mu.Lock()
defer queue.mu.Unlock()
queue.insert(nodes...)
}
2018-12-04 15:46:53 +00:00
// insert requires the mutex to be locked
func (queue *discoveryQueue) insert(nodes ...*pb.Node) {
2018-12-04 15:46:53 +00:00
for _, node := range nodes {
// TODO: empty node ids should be semantically different from the
// technically valid node id that is all zeros
if node.Id == (storj.NodeID{}) {
2018-12-04 15:46:53 +00:00
continue
}
if _, added := queue.state[node.Id]; added {
continue
2018-12-04 15:46:53 +00:00
}
queue.state[node.Id] = stateUnqueried
2018-12-04 15:46:53 +00:00
queue.items = append(queue.items, queueItem{
node: node,
priority: xorNodeID(queue.target, node.Id),
2018-12-04 15:46:53 +00:00
})
}
sort.Slice(queue.items, func(i, k int) bool {
return queue.items[i].priority.Less(queue.items[k].priority)
})
if len(queue.items) > queue.maxLen {
queue.items = queue.items[:queue.maxLen]
}
}
// ClosestUnqueried returns the closest unqueried item in the queue
func (queue *discoveryQueue) ClosestUnqueried() *pb.Node {
queue.mu.Lock()
defer queue.mu.Unlock()
for _, item := range queue.items {
if queue.state[item.node.Id] == stateUnqueried {
queue.state[item.node.Id] = stateQuerying
return item.node
}
}
return nil
}
// ClosestQueried returns the closest queried items in the queue
func (queue *discoveryQueue) ClosestQueried() []*pb.Node {
2018-12-04 15:46:53 +00:00
queue.mu.Lock()
defer queue.mu.Unlock()
rv := make([]*pb.Node, 0, len(queue.items))
for _, item := range queue.items {
if queue.state[item.node.Id] == stateSuccess {
rv = append(rv, item.node)
}
2018-12-04 15:46:53 +00:00
}
return rv
}
// QuerySuccess marks the node as successfully queried, and adds the results to the queue
// QuerySuccess marks nodes with a zero node ID as ignored, and ignores incoming
// nodes with a zero id.
func (queue *discoveryQueue) QuerySuccess(node *pb.Node, nodes ...*pb.Node) {
queue.mu.Lock()
defer queue.mu.Unlock()
queue.state[node.Id] = stateSuccess
queue.insert(nodes...)
}
// QueryFailure marks the node as failing query
func (queue *discoveryQueue) QueryFailure(node *pb.Node) {
queue.mu.Lock()
queue.state[node.Id] = stateFailure
queue.mu.Unlock()
2018-12-04 15:46:53 +00:00
}
// Unqueried returns the number of unqueried items in the queue
func (queue *discoveryQueue) Unqueried() (amount int) {
2018-12-04 15:46:53 +00:00
queue.mu.Lock()
defer queue.mu.Unlock()
for _, item := range queue.items {
if queue.state[item.node.Id] == stateUnqueried {
amount++
}
}
return amount
2018-12-04 15:46:53 +00:00
}