satellite/metabase/segmentloop: don't shutdown satellite on loop error
We made decision to avoid satellite shutdown when segment loop will return error. Loop still can reeturn error but it will be logged and we will make monitoring/alert around that error. Change-Id: I6aa8e284406edf644a09d6b1fe00c3155c5430c9
This commit is contained in:
parent
615aae6bdd
commit
55d7bcc59b
@ -42,7 +42,7 @@ func New(log *zap.Logger, mdb segmentloop.MetabaseDB, config Config) *Chore {
|
||||
|
||||
// RunOnce creates a new segmentloop and runs the verifications.
|
||||
func (chore *Chore) RunOnce(ctx context.Context) error {
|
||||
loop := segmentloop.New(chore.Config.Loop, chore.DB)
|
||||
loop := segmentloop.New(chore.Log, chore.Config.Loop, chore.DB)
|
||||
|
||||
var group errs2.Group
|
||||
group.Go(func() error {
|
||||
|
@ -55,6 +55,7 @@ storj.io/storj/satellite/gracefulexit."graceful_exit_success" Meter
|
||||
storj.io/storj/satellite/gracefulexit."graceful_exit_successful_pieces_transfer_ratio" IntVal
|
||||
storj.io/storj/satellite/gracefulexit."graceful_exit_transfer_piece_fail" Meter
|
||||
storj.io/storj/satellite/gracefulexit."graceful_exit_transfer_piece_success" Meter
|
||||
storj.io/storj/satellite/metabase/segmentloop."segmentloop_error" Event
|
||||
storj.io/storj/satellite/metabase/segmentloop."segmentsProcessed" IntVal
|
||||
storj.io/storj/satellite/metabase/segmentloop.*Service.RunOnce Task
|
||||
storj.io/storj/satellite/metainfo."metainfo_rate_limit_exceeded" Event
|
||||
|
@ -252,6 +252,7 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB,
|
||||
peer.Metainfo.Metabase,
|
||||
)
|
||||
peer.Metainfo.SegmentLoop = segmentloop.New(
|
||||
peer.Log.Named("metainfo:segmentloop"),
|
||||
config.Metainfo.SegmentLoop,
|
||||
peer.Metainfo.Metabase,
|
||||
)
|
||||
|
@ -133,6 +133,7 @@ func NewGarbageCollection(log *zap.Logger, full *identity.FullIdentity, db DB,
|
||||
// As long as garbage collection is the only observer joining the loop, then by default
|
||||
// the loop will only run when the garbage collection joins (which happens every GarbageCollection.Interval)
|
||||
peer.Metainfo.SegmentLoop = segmentloop.New(
|
||||
log.Named("segmentloop"),
|
||||
config.Metainfo.SegmentLoop,
|
||||
metabaseDB,
|
||||
)
|
||||
|
@ -11,8 +11,10 @@ import (
|
||||
|
||||
"github.com/spacemonkeygo/monkit/v3"
|
||||
"github.com/zeebo/errs"
|
||||
"go.uber.org/zap"
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"storj.io/common/errs2"
|
||||
"storj.io/storj/satellite/metabase"
|
||||
)
|
||||
|
||||
@ -160,6 +162,7 @@ type MetabaseDB interface {
|
||||
//
|
||||
// architecture: Service
|
||||
type Service struct {
|
||||
log *zap.Logger
|
||||
config Config
|
||||
metabaseDB MetabaseDB
|
||||
join chan *observerContext
|
||||
@ -167,8 +170,9 @@ type Service struct {
|
||||
}
|
||||
|
||||
// New creates a new segments loop service.
|
||||
func New(config Config, metabaseDB MetabaseDB) *Service {
|
||||
func New(log *zap.Logger, config Config, metabaseDB MetabaseDB) *Service {
|
||||
return &Service{
|
||||
log: log,
|
||||
metabaseDB: metabaseDB,
|
||||
config: config,
|
||||
join: make(chan *observerContext),
|
||||
@ -223,7 +227,13 @@ func (loop *Service) Run(ctx context.Context) (err error) {
|
||||
for {
|
||||
err := loop.RunOnce(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
loop.log.Error("segment loop failure", zap.Error(err))
|
||||
|
||||
if errs2.IsCanceled(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
mon.Event("segmentloop_error") //mon:locked
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -15,6 +15,7 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/zap"
|
||||
"go.uber.org/zap/zaptest"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"storj.io/common/errs2"
|
||||
@ -250,7 +251,7 @@ func TestSegmentsLoopCancel(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
loop := segmentloop.New(segmentloop.Config{
|
||||
loop := segmentloop.New(zaptest.NewLogger(t), segmentloop.Config{
|
||||
CoalesceDuration: 1 * time.Second,
|
||||
ListLimit: 10000,
|
||||
}, satellite.Metainfo.Metabase)
|
||||
@ -322,7 +323,7 @@ func TestSegmentsLoop_MonitorCancel(t *testing.T) {
|
||||
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
||||
satellite := planet.Satellites[0]
|
||||
|
||||
loop := segmentloop.New(segmentloop.Config{
|
||||
loop := segmentloop.New(zaptest.NewLogger(t), segmentloop.Config{
|
||||
CoalesceDuration: time.Nanosecond,
|
||||
ListLimit: 10000,
|
||||
}, satellite.Metainfo.Metabase)
|
||||
|
Loading…
Reference in New Issue
Block a user