Creates Accounting Pkg to tally at rest node storage (#568)

* creates accounting package with tally service

* adds cancel on context

* test online nodes
This commit is contained in:
Jennifer Li Johnson 2018-11-08 11:18:28 -05:00 committed by GitHub
parent 320c93368a
commit e678e52229
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 341 additions and 33 deletions

15
pkg/accounting/common.go Normal file
View File

@ -0,0 +1,15 @@
// Copyright (C) 2018 Storj Labs, Inc.
// See LICENSE for copying information.
package accounting
import (
"github.com/zeebo/errs"
monkit "gopkg.in/spacemonkeygo/monkit.v2"
)
// Error is a standard error class for this package.
var (
Error = errs.Class("tally error")
mon = monkit.Package()
)

47
pkg/accounting/config.go Normal file
View File

@ -0,0 +1,47 @@
// Copyright (C) 2018 Storj Labs, Inc.
// See LICENSE for copying information.
package accounting
import (
"context"
"time"
"go.uber.org/zap"
"storj.io/storj/pkg/kademlia"
"storj.io/storj/pkg/overlay"
"storj.io/storj/pkg/pointerdb"
"storj.io/storj/pkg/provider"
)
// Config contains configurable values accounting
type Config struct {
Interval time.Duration `help:"how frequently checker should audit segments" default:"30s"`
}
// Initialize a Accounting tally struct
func (c Config) initialize(ctx context.Context) (Tally, error) {
pointerdb := pointerdb.LoadFromContext(ctx)
overlay := overlay.LoadServerFromContext(ctx)
kademlia := kademlia.LoadFromContext(ctx)
return newTally(pointerdb, overlay, kademlia, 0, zap.L(), c.Interval), nil
}
// Run runs the tally with configured values
func (c Config) Run(ctx context.Context, server *provider.Provider) (err error) {
tally, err := c.initialize(ctx)
if err != nil {
return err
}
ctx, cancel := context.WithCancel(ctx)
go func() {
if err := tally.Run(ctx); err != nil {
defer cancel()
zap.L().Error("Error running tally", zap.Error(err))
}
}()
return server.Run(ctx)
}

162
pkg/accounting/tally.go Normal file
View File

@ -0,0 +1,162 @@
// Copyright (C) 2018 Storj Labs, Inc.
// See LICENSE for copying information.
package accounting
import (
"context"
"time"
"github.com/gogo/protobuf/proto"
"go.uber.org/zap"
"storj.io/storj/pkg/dht"
"storj.io/storj/pkg/kademlia"
"storj.io/storj/pkg/node"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/pointerdb"
"storj.io/storj/pkg/provider"
"storj.io/storj/pkg/utils"
"storj.io/storj/storage"
)
// Tally is the service for adding up storage node data usage
type Tally interface {
Run(ctx context.Context) error
}
type tally struct {
pointerdb *pointerdb.Server
overlay pb.OverlayServer
kademlia *kademlia.Kademlia
limit int
logger *zap.Logger
ticker *time.Ticker
}
func newTally(pointerdb *pointerdb.Server, overlay pb.OverlayServer, kademlia *kademlia.Kademlia, limit int, logger *zap.Logger, interval time.Duration) *tally {
return &tally{
pointerdb: pointerdb,
overlay: overlay,
kademlia: kademlia,
limit: limit,
logger: logger,
}
}
// Run the collector loop
func (t *tally) Run(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
for {
err = t.identifyActiveNodes(ctx)
if err != nil {
zap.L().Error("Collector failed", zap.Error(err))
}
select {
case <-t.ticker.C: // wait for the next interval to happen
case <-ctx.Done(): // or the collector is canceled via context
return ctx.Err()
}
}
}
// identifyActiveNodes iterates through pointerdb and identifies nodes that have storage on them
func (t *tally) identifyActiveNodes(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
rt, err := t.kademlia.GetRoutingTable(ctx)
if err != nil {
return Error.Wrap(err)
}
self := rt.Local()
identity := &provider.FullIdentity{} //do i need anything in here?
client, err := node.NewNodeClient(identity, self, t.kademlia)
if err != nil {
return Error.Wrap(err)
}
t.logger.Debug("entering pointerdb iterate")
err = t.pointerdb.Iterate(ctx, &pb.IterateRequest{Recurse: true},
func(it storage.Iterator) error {
var item storage.ListItem
lim := t.limit
if lim <= 0 || lim > storage.LookupLimit {
lim = storage.LookupLimit
}
for ; lim > 0 && it.Next(&item); lim-- {
pointer := &pb.Pointer{}
err = proto.Unmarshal(item.Value, pointer)
if err != nil {
return Error.Wrap(err)
}
pieces := pointer.Remote.RemotePieces
var nodeIDs []dht.NodeID
for _, p := range pieces {
nodeIDs = append(nodeIDs, node.IDFromString(p.NodeId))
}
online, err := t.onlineNodes(ctx, nodeIDs)
if err != nil {
return Error.Wrap(err)
}
go t.tallyAtRestStorage(ctx, pointer, online, client)
}
return nil
},
)
return err
}
func (t *tally) onlineNodes(ctx context.Context, nodeIDs []dht.NodeID) (online []*pb.Node, err error) {
responses, err := t.overlay.BulkLookup(ctx, utils.NodeIDsToLookupRequests(nodeIDs))
if err != nil {
return []*pb.Node{}, err
}
nodes := utils.LookupResponsesToNodes(responses)
for _, n := range nodes {
if n != nil {
online = append(online, n)
}
}
return online, nil
}
func (t *tally) tallyAtRestStorage(ctx context.Context, pointer *pb.Pointer, nodes []*pb.Node, client node.Client) {
segmentSize := pointer.GetSize()
minReq := pointer.Remote.Redundancy.GetMinReq()
if minReq <= 0 {
zap.L().Error("minReq must be an int greater than 0")
return
}
pieceSize := segmentSize / int64(minReq)
for _, n := range nodes {
nodeAvail := true
var err error
ok := t.needToContact(n.Id)
if ok {
nodeAvail, err = client.Ping(ctx, *n)
if err != nil {
zap.L().Error("ping failed")
continue
}
}
if nodeAvail {
err := t.updateGranularTable(n.Id, pieceSize)
if err != nil {
zap.L().Error("update failed")
}
}
}
}
func (t *tally) needToContact(nodeID string) bool {
//TODO
//check db if node was updated within the last time period
return true
}
func (t *tally) updateGranularTable(nodeID string, pieceSize int64) error {
//TODO
return nil
}

View File

@ -0,0 +1,74 @@
// Copyright (C) 2018 Storj Labs, Inc.
// See LICENSE for copying information.
package accounting
import (
"context"
"math/rand"
"strconv"
"testing"
"time"
"github.com/stretchr/testify/assert"
"go.uber.org/zap"
"storj.io/storj/pkg/dht"
"storj.io/storj/pkg/kademlia"
"storj.io/storj/pkg/node"
"storj.io/storj/pkg/overlay"
"storj.io/storj/pkg/overlay/mocks"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/pointerdb"
"storj.io/storj/storage/teststore"
)
var ctx = context.Background()
func TestIdentifyActiveNodes(t *testing.T) {
}
func TestOnlineNodes(t *testing.T) {
logger := zap.NewNop()
pointerdb := pointerdb.NewServer(teststore.New(), &overlay.Cache{}, logger, pointerdb.Config{}, nil)
const N = 50
nodes := []*pb.Node{}
nodeIDs := []dht.NodeID{}
expectedOnline := []*pb.Node{}
for i := 0; i < N; i++ {
str := strconv.Itoa(i)
n := &pb.Node{Id: str, Address: &pb.NodeAddress{Address: str}}
nodes = append(nodes, n)
if i%(rand.Intn(5)+2) == 0 {
id := node.IDFromString("id" + str)
nodeIDs = append(nodeIDs, id)
} else {
id := node.IDFromString(str)
nodeIDs = append(nodeIDs, id)
expectedOnline = append(expectedOnline, n)
}
}
overlayServer := mocks.NewOverlay(nodes)
kad := &kademlia.Kademlia{}
limit := 0
interval := time.Second
tally := newTally(pointerdb, overlayServer, kad, limit, logger, interval)
online, err := tally.onlineNodes(ctx, nodeIDs)
assert.NoError(t, err)
assert.Equal(t, expectedOnline, online)
}
func TestTallyAtRestStorage(t *testing.T) {
}
func TestNeedToContact(t *testing.T) {
}
func TestUpdateGranularTable(t *testing.T) {
}

View File

@ -15,12 +15,12 @@ import (
"storj.io/storj/pkg/node" "storj.io/storj/pkg/node"
"storj.io/storj/pkg/pb" "storj.io/storj/pkg/pb"
"storj.io/storj/pkg/pointerdb" "storj.io/storj/pkg/pointerdb"
"storj.io/storj/pkg/utils"
"storj.io/storj/storage" "storj.io/storj/storage"
) )
// Checker is the interface for the data repair queue // Checker is the interface for data repair checker
type Checker interface { type Checker interface {
IdentifyInjuredSegments(ctx context.Context) (err error)
Run(ctx context.Context) error Run(ctx context.Context) error
} }
@ -51,7 +51,7 @@ func (c *checker) Run(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err) defer mon.Task()(&ctx)(&err)
for { for {
err = c.IdentifyInjuredSegments(ctx) err = c.identifyInjuredSegments(ctx)
if err != nil { if err != nil {
zap.L().Error("Checker failed", zap.Error(err)) zap.L().Error("Checker failed", zap.Error(err))
} }
@ -64,8 +64,8 @@ func (c *checker) Run(ctx context.Context) (err error) {
} }
} }
// IdentifyInjuredSegments checks for missing pieces off of the pointerdb and overlay cache // identifyInjuredSegments checks for missing pieces off of the pointerdb and overlay cache
func (c *checker) IdentifyInjuredSegments(ctx context.Context) (err error) { func (c *checker) identifyInjuredSegments(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err) defer mon.Task()(&ctx)(&err)
c.logger.Debug("entering pointerdb iterate") c.logger.Debug("entering pointerdb iterate")
@ -89,7 +89,7 @@ func (c *checker) IdentifyInjuredSegments(ctx context.Context) (err error) {
} }
missingPieces, err := c.offlineNodes(ctx, nodeIDs) missingPieces, err := c.offlineNodes(ctx, nodeIDs)
if err != nil { if err != nil {
return Error.New("error getting missing offline nodes %s", err) return Error.New("error getting offline nodes %s", err)
} }
numHealthy := len(nodeIDs) - len(missingPieces) numHealthy := len(nodeIDs) - len(missingPieces)
if int32(numHealthy) < pointer.Remote.Redundancy.RepairThreshold { if int32(numHealthy) < pointer.Remote.Redundancy.RepairThreshold {
@ -108,13 +108,13 @@ func (c *checker) IdentifyInjuredSegments(ctx context.Context) (err error) {
return err return err
} }
// returns the indices of offline and online nodes // returns the indices of offline nodes
func (c *checker) offlineNodes(ctx context.Context, nodeIDs []dht.NodeID) (offline []int32, err error) { func (c *checker) offlineNodes(ctx context.Context, nodeIDs []dht.NodeID) (offline []int32, err error) {
responses, err := c.overlay.BulkLookup(ctx, nodeIDsToLookupRequests(nodeIDs)) responses, err := c.overlay.BulkLookup(ctx, utils.NodeIDsToLookupRequests(nodeIDs))
if err != nil { if err != nil {
return []int32{}, err return []int32{}, err
} }
nodes := lookupResponsesToNodes(responses) nodes := utils.LookupResponsesToNodes(responses)
for i, n := range nodes { for i, n := range nodes {
if n == nil { if n == nil {
offline = append(offline, int32(i)) offline = append(offline, int32(i))
@ -122,21 +122,3 @@ func (c *checker) offlineNodes(ctx context.Context, nodeIDs []dht.NodeID) (offli
} }
return offline, nil return offline, nil
} }
func nodeIDsToLookupRequests(nodeIDs []dht.NodeID) *pb.LookupRequests {
var rq []*pb.LookupRequest
for _, v := range nodeIDs {
r := &pb.LookupRequest{NodeID: v.String()}
rq = append(rq, r)
}
return &pb.LookupRequests{Lookuprequest: rq}
}
func lookupResponsesToNodes(responses *pb.LookupResponses) []*pb.Node {
var nodes []*pb.Node
for _, v := range responses.Lookupresponse {
n := v.Node
nodes = append(nodes, n)
}
return nodes
}

View File

@ -88,7 +88,7 @@ func TestIdentifyInjuredSegments(t *testing.T) {
limit := 0 limit := 0
interval := time.Second interval := time.Second
checker := newChecker(pointerdb, repairQueue, overlayServer, limit, logger, interval) checker := newChecker(pointerdb, repairQueue, overlayServer, limit, logger, interval)
err := checker.IdentifyInjuredSegments(ctx) err := checker.identifyInjuredSegments(ctx)
assert.NoError(t, err) assert.NoError(t, err)
//check if the expected segments were added to the queue //check if the expected segments were added to the queue
@ -106,7 +106,7 @@ func TestIdentifyInjuredSegments(t *testing.T) {
} }
} }
func TestOfflineAndOnlineNodes(t *testing.T) { func TestOfflineNodes(t *testing.T) {
logger := zap.NewNop() logger := zap.NewNop()
pointerdb := pointerdb.NewServer(teststore.New(), &overlay.Cache{}, logger, pointerdb.Config{}, nil) pointerdb := pointerdb.NewServer(teststore.New(), &overlay.Cache{}, logger, pointerdb.Config{}, nil)
@ -202,7 +202,7 @@ func BenchmarkIdentifyInjuredSegments(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
interval := time.Second interval := time.Second
checker := newChecker(pointerdb, repairQueue, overlayServer, limit, logger, interval) checker := newChecker(pointerdb, repairQueue, overlayServer, limit, logger, interval)
err = checker.IdentifyInjuredSegments(ctx) err = checker.identifyInjuredSegments(ctx)
assert.NoError(b, err) assert.NoError(b, err)
//check if the expected segments were added to the queue //check if the expected segments were added to the queue

View File

@ -15,7 +15,7 @@ import (
"storj.io/storj/storage/redis" "storj.io/storj/storage/redis"
) )
// Config contains configurable values for repairer // Config contains configurable values for checker
type Config struct { type Config struct {
QueueAddress string `help:"data checker queue address" default:"redis://127.0.0.1:6378?db=1&password=abc123"` QueueAddress string `help:"data checker queue address" default:"redis://127.0.0.1:6378?db=1&password=abc123"`
Interval time.Duration `help:"how frequently checker should audit segments" default:"30s"` Interval time.Duration `help:"how frequently checker should audit segments" default:"30s"`
@ -39,10 +39,11 @@ func (c Config) Run(ctx context.Context, server *provider.Provider) (err error)
if err != nil { if err != nil {
return err return err
} }
ctx, cancel := context.WithCancel(ctx)
// TODO(coyle): we need to figure out how to propagate the error up to cancel the service
go func() { go func() {
if err := check.Run(ctx); err != nil { if err := check.Run(ctx); err != nil {
defer cancel()
zap.L().Error("Error running checker", zap.Error(err)) zap.L().Error("Error running checker", zap.Error(err))
} }
}() }()

View File

@ -45,9 +45,12 @@ func (c Config) Run(ctx context.Context, server *provider.Provider) (err error)
repairer := newRepairer(queue, ss, c.Interval, c.MaxRepair) repairer := newRepairer(queue, ss, c.Interval, c.MaxRepair)
ctx, cancel := context.WithCancel(ctx)
// TODO(coyle): we need to figure out how to propagate the error up to cancel the service // TODO(coyle): we need to figure out how to propagate the error up to cancel the service
go func() { go func() {
if err := repairer.Run(ctx); err != nil { if err := repairer.Run(ctx); err != nil {
defer cancel()
zap.L().Error("Error running repairer", zap.Error(err)) zap.L().Error("Error running repairer", zap.Error(err))
} }
}() }()

View File

@ -15,7 +15,7 @@ import (
segment "storj.io/storj/pkg/storage/segments" segment "storj.io/storj/pkg/storage/segments"
) )
// Repairer is the interface for the data repair queue // Repairer is the interface for the data repairer
type Repairer interface { type Repairer interface {
Repair(ctx context.Context, seg *pb.InjuredSegment) error Repair(ctx context.Context, seg *pb.InjuredSegment) error
Run(ctx context.Context) error Run(ctx context.Context) error

View File

@ -63,5 +63,6 @@ func (s *Server) Query(ctx context.Context, req *pb.QueryRequest) (*pb.QueryResp
// Ping provides an easy way to verify a node is online and accepting requests // Ping provides an easy way to verify a node is online and accepting requests
func (s *Server) Ping(ctx context.Context, req *pb.PingRequest) (*pb.PingResponse, error) { func (s *Server) Ping(ctx context.Context, req *pb.PingRequest) (*pb.PingResponse, error) {
//TODO
return &pb.PingResponse{}, nil return &pb.PingResponse{}, nil
} }

View File

@ -9,6 +9,9 @@ import (
"net/url" "net/url"
"strings" "strings"
"time" "time"
"storj.io/storj/pkg/dht"
"storj.io/storj/pkg/pb"
) )
// GetBytes transforms an empty interface type into a byte slice // GetBytes transforms an empty interface type into a byte slice
@ -106,3 +109,23 @@ func discardNil(ch chan error) chan error {
}() }()
return r return r
} }
// NodeIDsToLookupRequests ...
func NodeIDsToLookupRequests(nodeIDs []dht.NodeID) *pb.LookupRequests {
var rq []*pb.LookupRequest
for _, v := range nodeIDs {
r := &pb.LookupRequest{NodeID: v.String()}
rq = append(rq, r)
}
return &pb.LookupRequests{Lookuprequest: rq}
}
// LookupResponsesToNodes ...
func LookupResponsesToNodes(responses *pb.LookupResponses) []*pb.Node {
var nodes []*pb.Node
for _, v := range responses.Lookupresponse {
n := v.Node
nodes = append(nodes, n)
}
return nodes
}