Add metainfo loop service (#2563)

Add a metainfo loop service on the satellite that can be subscribed to by various services that need to make use of metainfo information
This commit is contained in:
Maximillian von Briesen 2019-07-22 09:34:12 -04:00 committed by GitHub
parent 6143be5915
commit 6c1c3fb4a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 570 additions and 0 deletions

View File

@ -144,6 +144,9 @@ func (planet *Planet) newSatellites(count int) ([]*satellite.Peer, error) {
MaxThreshold: (planet.config.StorageNodeCount * 4 / 5),
Validate: false,
},
Loop: metainfo.LoopConfig{
CoalesceDuration: 5 * time.Second,
},
},
Orders: orders.Config{
Expiration: 7 * 24 * time.Hour,

View File

@ -38,6 +38,7 @@ type Config struct {
MaxInlineSegmentSize memory.Size `default:"8000" help:"maximum inline segment size"`
Overlay bool `default:"true" help:"toggle flag if overlay is enabled"`
RS RSConfig `help:"redundancy scheme configuration"`
Loop LoopConfig `help:"metainfo loop configuration"`
}
// NewStore returns database for storing pointer data

231
satellite/metainfo/loop.go Normal file
View File

@ -0,0 +1,231 @@
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package metainfo
import (
"context"
"time"
"github.com/gogo/protobuf/proto"
"github.com/zeebo/errs"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/storj"
"storj.io/storj/storage"
)
var (
// LoopError is a standard error class for this component.
LoopError = errs.Class("metainfo loop error")
// LoopClosedError is a loop closed error
LoopClosedError = LoopError.New("loop closed")
)
// Observer is an interface defining an observer that can subscribe to the metainfo loop.
type Observer interface {
RemoteSegment(context.Context, storj.Path, *pb.Pointer) error
RemoteObject(context.Context, storj.Path, *pb.Pointer) error
InlineSegment(context.Context, storj.Path, *pb.Pointer) error
}
type observerContext struct {
Observer
ctx context.Context
done chan error
}
func (observer *observerContext) HandleError(err error) bool {
if err != nil {
observer.done <- err
observer.Finish()
return true
}
return false
}
func (observer *observerContext) Finish() {
close(observer.done)
}
func (observer *observerContext) Wait() error {
return <-observer.done
}
// LoopConfig contains configurable values for the metainfo loop.
type LoopConfig struct {
CoalesceDuration time.Duration `help:"how long to wait for new observers before starting iteration" releaseDefault:"5s" devDefault:"5s"`
}
// Loop is a metainfo loop service.
type Loop struct {
config LoopConfig
metainfo *Service
join chan *observerContext
done chan struct{}
}
// NewLoop creates a new metainfo loop service.
func NewLoop(config LoopConfig, metainfo *Service) *Loop {
return &Loop{
metainfo: metainfo,
config: config,
join: make(chan *observerContext),
done: make(chan struct{}),
}
}
// Join will join the looper for one full cycle until completion and then returns.
// On ctx cancel the observer will return without completely finishing.
// Only on full complete iteration it will return nil.
// Safe to be called concurrently.
func (loop *Loop) Join(ctx context.Context, observer Observer) (err error) {
defer mon.Task()(&ctx)(&err)
obsContext := &observerContext{
Observer: observer,
ctx: ctx,
done: make(chan error),
}
select {
case loop.join <- obsContext:
case <-ctx.Done():
return ctx.Err()
case <-loop.done:
return LoopClosedError
}
return obsContext.Wait()
}
// Run starts the looping service.
// It can only be called once, otherwise a panic will occur.
func (loop *Loop) Run(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
defer close(loop.done)
for {
err := loop.runOnce(ctx)
if err != nil {
return err
}
}
}
// runOnce goes through metainfo one time and sends information to observers.
func (loop *Loop) runOnce(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
var observers []*observerContext
defer func() {
if err != nil {
for _, observer := range observers {
observer.HandleError(err)
}
return
}
for _, observer := range observers {
observer.Finish()
}
}()
// wait for the first observer, or exit because context is canceled
select {
case observer := <-loop.join:
observers = append(observers, observer)
case <-ctx.Done():
return ctx.Err()
}
// after the first observer is found, set timer for CoalesceDuration and add any observers that try to join before the timer is up
timer := time.NewTimer(loop.config.CoalesceDuration)
waitformore:
for {
select {
case observer := <-loop.join:
observers = append(observers, observer)
case <-timer.C:
break waitformore
case <-ctx.Done():
return ctx.Err()
}
}
err = loop.metainfo.Iterate(ctx, "", "", true, false,
func(ctx context.Context, it storage.Iterator) error {
var item storage.ListItem
// iterate over every segment in metainfo
for it.Next(ctx, &item) {
path := item.Key.String()
pointer := &pb.Pointer{}
err = proto.Unmarshal(item.Value, pointer)
if err != nil {
return LoopError.New("unexpected error unmarshalling pointer %s", err)
}
nextObservers := observers[:0]
for _, observer := range observers {
keepObserver := handlePointer(ctx, observer, path, pointer)
if keepObserver {
nextObservers = append(nextObservers, observer)
}
}
observers = nextObservers
if len(observers) == 0 {
return nil
}
// if context has been canceled exit. Otherwise, continue
select {
case <-ctx.Done():
return ctx.Err()
default:
}
}
return nil
})
return err
}
// handlePointer deals with a pointer for a single observer
// if there is some error on the observer, handle the error and return false. Otherwise, return true
func handlePointer(ctx context.Context, observer *observerContext, path storj.Path, pointer *pb.Pointer) bool {
pathElements := storj.SplitPath(path)
isLastSeg := len(pathElements) >= 2 && pathElements[1] == "l"
remote := pointer.GetRemote()
if remote != nil {
if observer.HandleError(observer.RemoteSegment(ctx, path, pointer)) {
return false
}
if isLastSeg {
if observer.HandleError(observer.RemoteObject(ctx, path, pointer)) {
return false
}
}
} else if observer.HandleError(observer.InlineSegment(ctx, path, pointer)) {
return false
}
select {
case <-observer.ctx.Done():
observer.HandleError(observer.ctx.Err())
return false
default:
}
return true
}
// Wait waits for run to be finished.
// Safe to be called concurrently.
func (loop *Loop) Wait() {
<-loop.done
}

View File

@ -0,0 +1,327 @@
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package metainfo_test
import (
"context"
"errors"
"strings"
"sync/atomic"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
"storj.io/storj/internal/errs2"
"storj.io/storj/internal/memory"
"storj.io/storj/internal/testcontext"
"storj.io/storj/internal/testplanet"
"storj.io/storj/internal/testrand"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/storj"
"storj.io/storj/satellite"
"storj.io/storj/satellite/metainfo"
)
// TestMetainfoLoop does the following
// * upload 5 remote files with 1 segment
// * (TODO) upload 3 remote files with 2 segments
// * upload 2 inline files
// * connect two observers to the metainfo loop
// * run the metainfo loop
// * expect that each observer has seen
// - 5 remote files
// - 5 remote segments
// - 2 inline files/segments
// - 7 unique path items
func TestMetainfoLoop(t *testing.T) {
// TODO: figure out how to configure testplanet so we can upload 2*segmentSize to get two segments
segmentSize := 8 * memory.KiB
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1,
StorageNodeCount: 4,
UplinkCount: 1,
Reconfigure: testplanet.Reconfigure{
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
config.Metainfo.Loop.CoalesceDuration = 1 * time.Second
},
},
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
ul := planet.Uplinks[0]
satellite := planet.Satellites[0]
metaLoop := satellite.Metainfo.Loop
// upload 5 remote files with 1 segment
for i := 0; i < 5; i++ {
testData := testrand.Bytes(segmentSize)
path := "/some/remote/path/" + string(i)
err := ul.Upload(ctx, satellite, "bucket", path, testData)
require.NoError(t, err)
}
// (TODO) upload 3 remote files with 2 segments
// for i := 0; i < 3; i++ {
// testData := testrand.Bytes(2 * segmentSize)
// path := "/some/other/remote/path/" + string(i)
// err := ul.Upload(ctx, satellite, "bucket", path, testData)
// require.NoError(t, err)
// }
// upload 2 inline files
for i := 0; i < 2; i++ {
testData := testrand.Bytes(segmentSize / 8)
path := "/some/inline/path/" + string(i)
err := ul.Upload(ctx, satellite, "bucket", path, testData)
require.NoError(t, err)
}
// create 2 observers
obs1 := newTestObserver(nil)
obs2 := newTestObserver(nil)
var group errgroup.Group
group.Go(func() error {
return metaLoop.Join(ctx, obs1)
})
group.Go(func() error {
return metaLoop.Join(ctx, obs2)
})
err := group.Wait()
require.NoError(t, err)
for _, obs := range []*testObserver{obs1, obs2} {
assert.EqualValues(t, 5, obs.remoteSegCount)
assert.EqualValues(t, 5, obs.remoteFileCount)
assert.EqualValues(t, 2, obs.inlineSegCount)
assert.EqualValues(t, 7, len(obs.uniquePaths))
}
})
}
// TestMetainfoLoopObserverCancel does the following:
// * upload 3 remote segments
// * hook three observers up to metainfo loop
// * let observer 1 run normally
// * let observer 2 return an error from one of its handlers
// * let observer 3's context be canceled
// * expect observer 1 to see all segments
// * expect observers 2 and 3 to finish with errors
func TestMetainfoLoopObserverCancel(t *testing.T) {
segmentSize := 8 * memory.KiB
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1,
StorageNodeCount: 4,
UplinkCount: 1,
Reconfigure: testplanet.Reconfigure{
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
config.Metainfo.Loop.CoalesceDuration = 1 * time.Second
},
},
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
ul := planet.Uplinks[0]
satellite := planet.Satellites[0]
metaLoop := satellite.Metainfo.Loop
// upload 3 remote files with 1 segment
for i := 0; i < 3; i++ {
testData := testrand.Bytes(segmentSize)
path := "/some/remote/path/" + string(i)
err := ul.Upload(ctx, satellite, "bucket", path, testData)
require.NoError(t, err)
}
// create 1 "good" observer
obs1 := newTestObserver(nil)
// create observer that will return an error from RemoteSegment
obs2 := newTestObserver(func(ctx context.Context) error {
return errors.New("test error")
})
// create observer that will cancel its own context from RemoteSegment
obs3Ctx, cancel := context.WithCancel(ctx)
var once int64
obs3 := newTestObserver(func(ctx context.Context) error {
if atomic.AddInt64(&once, 1) == 1 {
cancel()
<-obs3Ctx.Done() // ensure we wait for cancellation to propagate
} else {
panic("multiple calls to observer after loop cancel")
}
return nil
})
var group errgroup.Group
group.Go(func() error {
return metaLoop.Join(ctx, obs1)
})
group.Go(func() error {
err := metaLoop.Join(ctx, obs2)
if err == nil {
return errors.New("got no error")
}
if !strings.Contains(err.Error(), "test error") {
return errors.New("expected to find error")
}
return nil
})
group.Go(func() error {
err := metaLoop.Join(obs3Ctx, obs3)
if !errs2.IsCanceled(err) {
return errors.New("expected canceled")
}
return nil
})
err := group.Wait()
require.NoError(t, err)
// expect that obs1 saw all three segments, but obs2 and obs3 only saw the first one
assert.EqualValues(t, 3, obs1.remoteSegCount)
assert.EqualValues(t, 1, obs2.remoteSegCount)
assert.EqualValues(t, 1, obs3.remoteSegCount)
})
}
// TestMetainfoLoopCancel does the following:
// * upload 3 remote segments
// * hook two observers up to metainfo loop
// * cancel loop context partway through
// * expect both observers to exit with an error and see fewer than 3 remote segments
// * expect that a new observer attempting to join at this point receives a loop closed error
func TestMetainfoLoopCancel(t *testing.T) {
segmentSize := 8 * memory.KiB
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1,
StorageNodeCount: 4,
UplinkCount: 1,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
ul := planet.Uplinks[0]
satellite := planet.Satellites[0]
// upload 3 remote files with 1 segment
for i := 0; i < 3; i++ {
testData := testrand.Bytes(segmentSize)
path := "/some/remote/path/" + string(i)
err := ul.Upload(ctx, satellite, "bucket", path, testData)
require.NoError(t, err)
}
// create a new metainfo loop
metaLoop := metainfo.NewLoop(metainfo.LoopConfig{
CoalesceDuration: 1 * time.Second,
}, satellite.Metainfo.Service)
// create a cancelable context to pass into metaLoop.Run
loopCtx, cancel := context.WithCancel(ctx)
// create 1 normal observer
obs1 := newTestObserver(nil)
var once int64
// create another normal observer that will wait before returning during RemoteSegment so we can sync with context cancelation
obs2 := newTestObserver(func(ctx context.Context) error {
// cancel context during call to obs2.RemoteSegment inside loop
if atomic.AddInt64(&once, 1) == 1 {
cancel()
<-ctx.Done() // ensure we wait for cancellation to propagate
} else {
panic("multiple calls to observer after loop cancel")
}
return nil
})
var group errgroup.Group
// start loop with cancelable context
group.Go(func() error {
err := metaLoop.Run(loopCtx)
if !errs2.IsCanceled(err) {
return errors.New("expected context canceled")
}
return nil
})
group.Go(func() error {
err := metaLoop.Join(ctx, obs1)
if !errs2.IsCanceled(err) {
return errors.New("expected context canceled")
}
return nil
})
group.Go(func() error {
err := metaLoop.Join(ctx, obs2)
if !errs2.IsCanceled(err) {
return errors.New("expected context canceled")
}
return nil
})
err := group.Wait()
require.NoError(t, err)
obs3 := newTestObserver(nil)
err = metaLoop.Join(ctx, obs3)
require.Error(t, err)
assert.Contains(t, err.Error(), "loop closed")
// expect that obs1 and obs2 each saw fewer than three remote segments
assert.True(t, obs1.remoteSegCount < 3)
assert.True(t, obs2.remoteSegCount < 3)
})
}
type testObserver struct {
remoteSegCount int
remoteFileCount int
inlineSegCount int
uniquePaths map[string]struct{}
onSegment func(context.Context) error // if set, run this during RemoteSegment()
}
func newTestObserver(onSegment func(context.Context) error) *testObserver {
return &testObserver{
remoteSegCount: 0,
remoteFileCount: 0,
inlineSegCount: 0,
uniquePaths: make(map[string]struct{}),
onSegment: onSegment,
}
}
func (obs *testObserver) RemoteSegment(ctx context.Context, path storj.Path, pointer *pb.Pointer) error {
obs.remoteSegCount++
if _, ok := obs.uniquePaths[path]; ok {
// TODO: collect the errors and check in test
panic("Expected unique path in observer.RemoteSegment")
}
obs.uniquePaths[path] = struct{}{}
if obs.onSegment != nil {
return obs.onSegment(ctx)
}
return nil
}
func (obs *testObserver) RemoteObject(ctx context.Context, path storj.Path, pointer *pb.Pointer) error {
obs.remoteFileCount++
return nil
}
func (obs *testObserver) InlineSegment(ctx context.Context, path storj.Path, pointer *pb.Pointer) error {
obs.inlineSegCount++
if _, ok := obs.uniquePaths[path]; ok {
// TODO: collect the errors and check in test
panic("Expected unique path in observer.InlineSegment")
}
obs.uniquePaths[path] = struct{}{}
return nil
}

View File

@ -167,6 +167,7 @@ type Peer struct {
Database storage.KeyValueStore // TODO: move into pointerDB
Service *metainfo.Service
Endpoint2 *metainfo.Endpoint
Loop *metainfo.Loop
}
Inspector struct {
@ -407,6 +408,7 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, config *Config, ve
peer.Metainfo.Database,
peer.DB.Buckets(),
)
peer.Metainfo.Loop = metainfo.NewLoop(config.Metainfo.Loop, peer.Metainfo.Service)
peer.Metainfo.Endpoint2 = metainfo.NewEndpoint(
peer.Log.Named("metainfo:endpoint"),
@ -651,6 +653,9 @@ func (peer *Peer) Run(ctx context.Context) (err error) {
group.Go(func() error {
return errs2.IgnoreCanceled(peer.Repair.Checker.Run(ctx))
})
group.Go(func() error {
return errs2.IgnoreCanceled(peer.Metainfo.Loop.Run(ctx))
})
group.Go(func() error {
return errs2.IgnoreCanceled(peer.Repair.Repairer.Run(ctx))
})

View File

@ -172,6 +172,9 @@ kademlia.operator.wallet: ""
# the database connection string to use
# metainfo.database-url: "postgres://"
# how long to wait for new observers before starting iteration
# metainfo.loop.coalesce-duration: 5s
# maximum inline segment size
# metainfo.max-inline-segment-size: 8.0 KB