Checker service refactor (v3-1871) (#2082)
* refactor the checker service * monkit update
This commit is contained in:
parent
6db9388082
commit
4ad5120923
@ -10,6 +10,7 @@ import (
|
|||||||
"github.com/gogo/protobuf/proto"
|
"github.com/gogo/protobuf/proto"
|
||||||
"github.com/zeebo/errs"
|
"github.com/zeebo/errs"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
"golang.org/x/sync/errgroup"
|
||||||
monkit "gopkg.in/spacemonkeygo/monkit.v2"
|
monkit "gopkg.in/spacemonkeygo/monkit.v2"
|
||||||
|
|
||||||
"storj.io/storj/internal/sync2"
|
"storj.io/storj/internal/sync2"
|
||||||
@ -53,6 +54,7 @@ type Checker struct {
|
|||||||
logger *zap.Logger
|
logger *zap.Logger
|
||||||
Loop sync2.Cycle
|
Loop sync2.Cycle
|
||||||
IrreparableLoop sync2.Cycle
|
IrreparableLoop sync2.Cycle
|
||||||
|
monStats durabilityStats
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewChecker creates a new instance of checker
|
// NewChecker creates a new instance of checker
|
||||||
@ -67,6 +69,7 @@ func NewChecker(metainfo *metainfo.Service, repairQueue queue.RepairQueue, overl
|
|||||||
logger: logger,
|
logger: logger,
|
||||||
Loop: *sync2.NewCycle(repairInterval),
|
Loop: *sync2.NewCycle(repairInterval),
|
||||||
IrreparableLoop: *sync2.NewCycle(irreparableInterval),
|
IrreparableLoop: *sync2.NewCycle(irreparableInterval),
|
||||||
|
monStats: durabilityStats{},
|
||||||
}
|
}
|
||||||
return checker
|
return checker
|
||||||
}
|
}
|
||||||
@ -75,34 +78,17 @@ func NewChecker(metainfo *metainfo.Service, repairQueue queue.RepairQueue, overl
|
|||||||
func (checker *Checker) Run(ctx context.Context) (err error) {
|
func (checker *Checker) Run(ctx context.Context) (err error) {
|
||||||
defer mon.Task()(&ctx)(&err)
|
defer mon.Task()(&ctx)(&err)
|
||||||
|
|
||||||
c := make(chan error)
|
group, ctx := errgroup.WithContext(ctx)
|
||||||
|
|
||||||
go func() {
|
group.Go(func() error {
|
||||||
c <- checker.Loop.Run(ctx, func(ctx context.Context) error {
|
return checker.Loop.Run(ctx, checker.IdentifyInjuredSegments)
|
||||||
err := checker.IdentifyInjuredSegments(ctx)
|
})
|
||||||
if err != nil {
|
|
||||||
checker.logger.Error("error with injured segments identification: ", zap.Error(err))
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
}()
|
|
||||||
|
|
||||||
go func() {
|
group.Go(func() error {
|
||||||
c <- checker.IrreparableLoop.Run(ctx, func(ctx context.Context) error {
|
return checker.IrreparableLoop.Run(ctx, checker.IrreparableProcess)
|
||||||
err := checker.IrreparableProcess(ctx)
|
})
|
||||||
if err != nil {
|
|
||||||
checker.logger.Error("error with irreparable segments identification", zap.Error(err))
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
}()
|
|
||||||
|
|
||||||
for err := range c {
|
return group.Wait()
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close halts the Checker loop
|
// Close halts the Checker loop
|
||||||
@ -115,8 +101,6 @@ func (checker *Checker) Close() error {
|
|||||||
func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error) {
|
func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error) {
|
||||||
defer mon.Task()(&ctx)(&err)
|
defer mon.Task()(&ctx)(&err)
|
||||||
|
|
||||||
var monStats durabilityStats
|
|
||||||
|
|
||||||
err = checker.metainfo.Iterate("", checker.lastChecked, true, false,
|
err = checker.metainfo.Iterate("", checker.lastChecked, true, false,
|
||||||
func(it storage.Iterator) error {
|
func(it storage.Iterator) error {
|
||||||
var item storage.ListItem
|
var item storage.ListItem
|
||||||
@ -129,11 +113,14 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
|||||||
// if we have finished iterating, send and reset durability stats
|
// if we have finished iterating, send and reset durability stats
|
||||||
if checker.lastChecked == "" {
|
if checker.lastChecked == "" {
|
||||||
// send durability stats
|
// send durability stats
|
||||||
mon.IntVal("remote_files_checked").Observe(monStats.remoteFilesChecked)
|
mon.IntVal("remote_files_checked").Observe(checker.monStats.remoteFilesChecked)
|
||||||
mon.IntVal("remote_segments_checked").Observe(monStats.remoteSegmentsChecked)
|
mon.IntVal("remote_segments_checked").Observe(checker.monStats.remoteSegmentsChecked)
|
||||||
mon.IntVal("remote_segments_needing_repair").Observe(monStats.remoteSegmentsNeedingRepair)
|
mon.IntVal("remote_segments_needing_repair").Observe(checker.monStats.remoteSegmentsNeedingRepair)
|
||||||
mon.IntVal("remote_segments_lost").Observe(monStats.remoteSegmentsLost)
|
mon.IntVal("remote_segments_lost").Observe(checker.monStats.remoteSegmentsLost)
|
||||||
mon.IntVal("remote_files_lost").Observe(int64(len(monStats.remoteSegmentInfo)))
|
mon.IntVal("remote_files_lost").Observe(int64(len(checker.monStats.remoteSegmentInfo)))
|
||||||
|
|
||||||
|
// reset durability stats for next iteration
|
||||||
|
checker.monStats = durabilityStats{}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@ -145,7 +132,7 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
|||||||
return Error.New("error unmarshalling pointer %s", err)
|
return Error.New("error unmarshalling pointer %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = checker.updateSegmentStatus(ctx, pointer, item.Key.String(), &monStats)
|
err = checker.updateSegmentStatus(ctx, pointer, item.Key.String(), &checker.monStats)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -253,7 +240,6 @@ func (checker *Checker) IrreparableProcess(ctx context.Context) (err error) {
|
|||||||
|
|
||||||
limit := 1
|
limit := 1
|
||||||
var offset int64
|
var offset int64
|
||||||
var monStats durabilityStats
|
|
||||||
|
|
||||||
for {
|
for {
|
||||||
seg, err := checker.irrdb.GetLimited(ctx, limit, offset)
|
seg, err := checker.irrdb.GetLimited(ctx, limit, offset)
|
||||||
@ -266,18 +252,12 @@ func (checker *Checker) IrreparableProcess(ctx context.Context) (err error) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
err = checker.updateSegmentStatus(ctx, seg[0].GetSegmentDetail(), string(seg[0].GetPath()), &monStats)
|
err = checker.updateSegmentStatus(ctx, seg[0].GetSegmentDetail(), string(seg[0].GetPath()), &durabilityStats{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
checker.logger.Error("irrepair segment checker failed: ", zap.Error(err))
|
checker.logger.Error("irrepair segment checker failed: ", zap.Error(err))
|
||||||
}
|
}
|
||||||
offset++
|
offset++
|
||||||
}
|
}
|
||||||
// send durability stats
|
|
||||||
mon.IntVal("remote_files_checked").Observe(monStats.remoteFilesChecked)
|
|
||||||
mon.IntVal("remote_segments_checked").Observe(monStats.remoteSegmentsChecked)
|
|
||||||
mon.IntVal("remote_segments_needing_repair").Observe(monStats.remoteSegmentsNeedingRepair)
|
|
||||||
mon.IntVal("remote_segments_lost").Observe(monStats.remoteSegmentsLost)
|
|
||||||
mon.IntVal("remote_files_lost").Observe(int64(len(monStats.remoteSegmentInfo)))
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user