satellite/metabase/metaloop: add Monitor

We need some chores to join without triggering the loop.

For example it's fine to run metrics, only when something else is
running.

Change-Id: I9d8bd16f59c28c540c8d72971bc4e233a8660c02
This commit is contained in:
Egon Elbre 2021-04-10 12:31:48 +03:00
parent fff21b330d
commit c1fbecb96b
3 changed files with 161 additions and 46 deletions

View File

@ -47,24 +47,22 @@ func (chore *Chore) RunOnce(ctx context.Context) error {
loop := metaloop.New(chore.Config.Loop, chore.DB)
var group errs2.Group
group.Go(func() error {
plainOffset := &SegmentSizes{
Log: chore.Log.Named("segment-sizes"),
}
err := loop.Join(ctx, plainOffset)
return Error.Wrap(err)
})
group.Go(func() error {
progress := &ProgressObserver{
Log: chore.Log.Named("progress"),
ProgressPrintFrequency: chore.Config.ProgressPrintFrequency,
}
plainOffset := &SegmentSizes{
Log: chore.Log.Named("segment-sizes"),
}
err := loop.Join(ctx, progress, plainOffset)
if err != nil {
return Error.Wrap(err)
}
err := loop.Monitor(ctx, progress)
progress.Report()
return nil
return Error.Wrap(err)
})
group.Go(func() error {
return Error.Wrap(loop.RunOnce(ctx))

View File

@ -89,6 +89,7 @@ func (NullObserver) LoopStarted(context.Context, LoopInfo) error {
}
type observerContext struct {
trigger bool
observer Observer
ctx context.Context
@ -178,7 +179,7 @@ type MetabaseDB interface {
type Service struct {
config Config
metabaseDB MetabaseDB
join chan []*observerContext
join chan *observerContext
done chan struct{}
}
@ -187,37 +188,48 @@ func New(config Config, metabaseDB MetabaseDB) *Service {
return &Service{
metabaseDB: metabaseDB,
config: config,
join: make(chan []*observerContext),
join: make(chan *observerContext),
done: make(chan struct{}),
}
}
// Join will join the looper for one full cycle until completion and then returns.
// Joining will trigger a new iteration after coalesce duration.
// On ctx cancel the observer will return without completely finishing.
// Only on full complete iteration it will return nil.
// Safe to be called concurrently.
func (loop *Service) Join(ctx context.Context, observers ...Observer) (err error) {
func (loop *Service) Join(ctx context.Context, observer Observer) (err error) {
return loop.joinObserver(ctx, true, observer)
}
// Monitor will join the looper for one full cycle until completion and then returns.
// Joining with monitoring won't trigger after coalesce duration.
// On ctx cancel the observer will return without completely finishing.
// Only on full complete iteration it will return nil.
// Safe to be called concurrently.
func (loop *Service) Monitor(ctx context.Context, observer Observer) (err error) {
return loop.joinObserver(ctx, false, observer)
}
// joinObserver will join the looper for one full cycle until completion and then returns.
// On ctx cancel the observer will return without completely finishing.
// Only on full complete iteration it will return nil.
// Safe to be called concurrently.
func (loop *Service) joinObserver(ctx context.Context, trigger bool, obs Observer) (err error) {
defer mon.Task()(&ctx)(&err)
obsContexts := make([]*observerContext, len(observers))
for i, obs := range observers {
obsContexts[i] = newObserverContext(ctx, obs)
}
obsctx := newObserverContext(ctx, obs)
obsctx.trigger = trigger
select {
case loop.join <- obsContexts:
case loop.join <- obsctx:
case <-ctx.Done():
return ctx.Err()
case <-loop.done:
return ErrClosed
}
var errList errs.Group
for _, ctx := range obsContexts {
errList.Add(ctx.Wait())
}
return errList.Err()
return obsctx.Wait()
}
// Run starts the looping service.
@ -248,30 +260,80 @@ var monMetainfo = monkit.ScopeNamed("storj.io/storj/satellite/metainfo/metaloop"
func (loop *Service) RunOnce(ctx context.Context) (err error) {
defer monMetainfo.Task()(&ctx)(&err) //mon:locked
var observers []*observerContext
// wait for the first observer, or exit because context is canceled
select {
case list := <-loop.join:
observers = append(observers, list...)
case <-ctx.Done():
return ctx.Err()
coalesceTimer := time.NewTimer(loop.config.CoalesceDuration)
defer coalesceTimer.Stop()
if !coalesceTimer.Stop() {
<-coalesceTimer.C
}
// after the first observer is found, set timer for CoalesceDuration and add any observers that try to join before the timer is up
timer := time.NewTimer(loop.config.CoalesceDuration)
earlyExit := make(chan *observerContext)
earlyExitDone := make(chan struct{})
monitorEarlyExit := func(obs *observerContext) {
select {
case <-obs.ctx.Done():
select {
case <-earlyExitDone:
case earlyExit <- obs:
}
case <-earlyExitDone:
}
}
timerStarted := false
observers := []*observerContext{}
waitformore:
for {
select {
case list := <-loop.join:
observers = append(observers, list...)
case <-timer.C:
// when the coalesce timer hits, we have waited enough for observers to join.
case <-coalesceTimer.C:
break waitformore
// wait for a new observer to join.
case obsctx := <-loop.join:
// when the observer triggers the loop and it's the first one,
// then start the coalescing timer.
if obsctx.trigger {
if !timerStarted {
coalesceTimer.Reset(loop.config.CoalesceDuration)
timerStarted = true
}
}
observers = append(observers, obsctx)
go monitorEarlyExit(obsctx)
// remove an observer from waiting when it's canceled before the loop starts.
case obsctx := <-earlyExit:
for i, obs := range observers {
if obs == obsctx {
observers = append(observers[:i], observers[i+1:]...)
break
}
}
obsctx.HandleError(obsctx.ctx.Err())
// reevalute, whether we acually need to start the loop.
timerShouldRun := false
for _, obs := range observers {
timerShouldRun = timerShouldRun || obs.trigger
}
if !timerShouldRun && timerStarted {
if !coalesceTimer.Stop() {
<-coalesceTimer.C
}
}
// when ctx done happens we can finish all the waiting observers.
case <-ctx.Done():
finishObservers(observers)
close(earlyExitDone)
errorObservers(observers, ctx.Err())
return ctx.Err()
}
}
close(earlyExitDone)
return iterateDatabase(ctx, loop.metabaseDB, observers, loop.config.ListLimit, rate.NewLimiter(rate.Limit(loop.config.RateLimit), 1))
}
@ -284,9 +346,7 @@ func (loop *Service) Wait() {
func iterateDatabase(ctx context.Context, metabaseDB MetabaseDB, observers []*observerContext, limit int, rateLimiter *rate.Limiter) (err error) {
defer func() {
if err != nil {
for _, observer := range observers {
observer.HandleError(err)
}
errorObservers(observers, err)
return
}
finishObservers(observers)
@ -511,3 +571,9 @@ func finishObservers(observers []*observerContext) {
observer.Finish()
}
}
func errorObservers(observers []*observerContext, err error) {
for _, observer := range observers {
observer.HandleError(err)
}
}

View File

@ -234,7 +234,7 @@ func TestLoopObserverCancel(t *testing.T) {
// create 1 "good" observer
obs1 := newTestObserver(nil)
obs1x := newTestObserver(nil)
mon1 := newTestObserver(nil)
// create observer that will return an error from RemoteSegment
obs2 := newTestObserver(func(ctx context.Context) error {
@ -256,7 +256,10 @@ func TestLoopObserverCancel(t *testing.T) {
var group errgroup.Group
group.Go(func() error {
return metaLoop.Join(ctx, obs1, obs1x)
return metaLoop.Join(ctx, obs1)
})
group.Go(func() error {
return metaLoop.Monitor(ctx, mon1)
})
group.Go(func() error {
err := metaLoop.Join(ctx, obs2)
@ -281,7 +284,7 @@ func TestLoopObserverCancel(t *testing.T) {
// expect that obs1 saw all three segments, but obs2 and obs3 only saw the first one
assert.EqualValues(t, 3, obs1.remoteSegCount)
assert.EqualValues(t, 3, obs1x.remoteSegCount)
assert.EqualValues(t, 3, mon1.remoteSegCount)
assert.EqualValues(t, 1, obs2.remoteSegCount)
assert.EqualValues(t, 1, obs3.remoteSegCount)
})
@ -379,6 +382,54 @@ func TestLoopCancel(t *testing.T) {
})
}
func TestLoop_MonitorCancel(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
satellite := planet.Satellites[0]
metaLoop := metaloop.New(metaloop.Config{
CoalesceDuration: time.Nanosecond,
ListLimit: 10000,
}, satellite.Metainfo.Metabase)
obs1 := newTestObserver(func(ctx context.Context) error {
return errors.New("test error")
})
var group errgroup.Group
loopCtx, loopCancel := context.WithCancel(ctx)
group.Go(func() error {
err := metaLoop.Run(loopCtx)
t.Log("metaloop stopped")
if !errs2.IsCanceled(err) {
return errors.New("expected context canceled")
}
return nil
})
obsCtx, obsCancel := context.WithCancel(ctx)
group.Go(func() error {
defer loopCancel()
err := metaLoop.Monitor(obsCtx, obs1)
t.Log("observer stopped")
if !errs2.IsCanceled(err) {
return errors.New("expected context canceled")
}
return nil
})
obsCancel()
err := group.Wait()
require.NoError(t, err)
err = metaLoop.Close()
require.NoError(t, err)
})
}
type testObserver struct {
objectCount int
remoteSegCount int