diff --git a/cmd/metabase-verify/verify/verify.go b/cmd/metabase-verify/verify/verify.go index 017bfde50..13f4a1395 100644 --- a/cmd/metabase-verify/verify/verify.go +++ b/cmd/metabase-verify/verify/verify.go @@ -42,7 +42,7 @@ func New(log *zap.Logger, mdb segmentloop.MetabaseDB, config Config) *Chore { // RunOnce creates a new segmentloop and runs the verifications. func (chore *Chore) RunOnce(ctx context.Context) error { - loop := segmentloop.New(chore.Config.Loop, chore.DB) + loop := segmentloop.New(chore.Log, chore.Config.Loop, chore.DB) var group errs2.Group group.Go(func() error { diff --git a/monkit.lock b/monkit.lock index 36a4f6def..541d93472 100644 --- a/monkit.lock +++ b/monkit.lock @@ -55,6 +55,7 @@ storj.io/storj/satellite/gracefulexit."graceful_exit_success" Meter storj.io/storj/satellite/gracefulexit."graceful_exit_successful_pieces_transfer_ratio" IntVal storj.io/storj/satellite/gracefulexit."graceful_exit_transfer_piece_fail" Meter storj.io/storj/satellite/gracefulexit."graceful_exit_transfer_piece_success" Meter +storj.io/storj/satellite/metabase/segmentloop."segmentloop_error" Event storj.io/storj/satellite/metabase/segmentloop."segmentsProcessed" IntVal storj.io/storj/satellite/metabase/segmentloop.*Service.RunOnce Task storj.io/storj/satellite/metainfo."metainfo_rate_limit_exceeded" Event diff --git a/satellite/core.go b/satellite/core.go index 865b2190e..03b30d779 100644 --- a/satellite/core.go +++ b/satellite/core.go @@ -252,6 +252,7 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, peer.Metainfo.Metabase, ) peer.Metainfo.SegmentLoop = segmentloop.New( + peer.Log.Named("metainfo:segmentloop"), config.Metainfo.SegmentLoop, peer.Metainfo.Metabase, ) diff --git a/satellite/gc.go b/satellite/gc.go index f19abb91b..1b7e0d1c9 100644 --- a/satellite/gc.go +++ b/satellite/gc.go @@ -133,6 +133,7 @@ func NewGarbageCollection(log *zap.Logger, full *identity.FullIdentity, db DB, // As long as garbage collection is the only observer joining the loop, then by default // the loop will only run when the garbage collection joins (which happens every GarbageCollection.Interval) peer.Metainfo.SegmentLoop = segmentloop.New( + log.Named("segmentloop"), config.Metainfo.SegmentLoop, metabaseDB, ) diff --git a/satellite/metabase/segmentloop/service.go b/satellite/metabase/segmentloop/service.go index 4cd990a99..2eb7d4f2d 100644 --- a/satellite/metabase/segmentloop/service.go +++ b/satellite/metabase/segmentloop/service.go @@ -11,8 +11,10 @@ import ( "github.com/spacemonkeygo/monkit/v3" "github.com/zeebo/errs" + "go.uber.org/zap" "golang.org/x/time/rate" + "storj.io/common/errs2" "storj.io/storj/satellite/metabase" ) @@ -160,6 +162,7 @@ type MetabaseDB interface { // // architecture: Service type Service struct { + log *zap.Logger config Config metabaseDB MetabaseDB join chan *observerContext @@ -167,8 +170,9 @@ type Service struct { } // New creates a new segments loop service. -func New(config Config, metabaseDB MetabaseDB) *Service { +func New(log *zap.Logger, config Config, metabaseDB MetabaseDB) *Service { return &Service{ + log: log, metabaseDB: metabaseDB, config: config, join: make(chan *observerContext), @@ -223,7 +227,13 @@ func (loop *Service) Run(ctx context.Context) (err error) { for { err := loop.RunOnce(ctx) if err != nil { - return err + loop.log.Error("segment loop failure", zap.Error(err)) + + if errs2.IsCanceled(err) { + return err + } + + mon.Event("segmentloop_error") //mon:locked } } } diff --git a/satellite/metabase/segmentloop/service_test.go b/satellite/metabase/segmentloop/service_test.go index 259316bcb..7d1c7fab8 100644 --- a/satellite/metabase/segmentloop/service_test.go +++ b/satellite/metabase/segmentloop/service_test.go @@ -15,6 +15,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap" + "go.uber.org/zap/zaptest" "golang.org/x/sync/errgroup" "storj.io/common/errs2" @@ -250,7 +251,7 @@ func TestSegmentsLoopCancel(t *testing.T) { require.NoError(t, err) } - loop := segmentloop.New(segmentloop.Config{ + loop := segmentloop.New(zaptest.NewLogger(t), segmentloop.Config{ CoalesceDuration: 1 * time.Second, ListLimit: 10000, }, satellite.Metainfo.Metabase) @@ -322,7 +323,7 @@ func TestSegmentsLoop_MonitorCancel(t *testing.T) { }, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) { satellite := planet.Satellites[0] - loop := segmentloop.New(segmentloop.Config{ + loop := segmentloop.New(zaptest.NewLogger(t), segmentloop.Config{ CoalesceDuration: time.Nanosecond, ListLimit: 10000, }, satellite.Metainfo.Metabase)