Checker service refactor (v3-1871) (#2082)

*  refactor the checker service

* monkit update
This commit is contained in:
aligeti 2019-05-31 10:12:49 -04:00 committed by GitHub
parent 6db9388082
commit 4ad5120923
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -10,6 +10,7 @@ import (
"github.com/gogo/protobuf/proto"
"github.com/zeebo/errs"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
monkit "gopkg.in/spacemonkeygo/monkit.v2"
"storj.io/storj/internal/sync2"
@ -53,6 +54,7 @@ type Checker struct {
logger *zap.Logger
Loop sync2.Cycle
IrreparableLoop sync2.Cycle
monStats durabilityStats
}
// NewChecker creates a new instance of checker
@ -67,6 +69,7 @@ func NewChecker(metainfo *metainfo.Service, repairQueue queue.RepairQueue, overl
logger: logger,
Loop: *sync2.NewCycle(repairInterval),
IrreparableLoop: *sync2.NewCycle(irreparableInterval),
monStats: durabilityStats{},
}
return checker
}
@ -75,34 +78,17 @@ func NewChecker(metainfo *metainfo.Service, repairQueue queue.RepairQueue, overl
func (checker *Checker) Run(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
c := make(chan error)
group, ctx := errgroup.WithContext(ctx)
go func() {
c <- checker.Loop.Run(ctx, func(ctx context.Context) error {
err := checker.IdentifyInjuredSegments(ctx)
if err != nil {
checker.logger.Error("error with injured segments identification: ", zap.Error(err))
}
return nil
group.Go(func() error {
return checker.Loop.Run(ctx, checker.IdentifyInjuredSegments)
})
}()
go func() {
c <- checker.IrreparableLoop.Run(ctx, func(ctx context.Context) error {
err := checker.IrreparableProcess(ctx)
if err != nil {
checker.logger.Error("error with irreparable segments identification", zap.Error(err))
}
return nil
group.Go(func() error {
return checker.IrreparableLoop.Run(ctx, checker.IrreparableProcess)
})
}()
for err := range c {
if err != nil {
return err
}
}
return nil
return group.Wait()
}
// Close halts the Checker loop
@ -115,8 +101,6 @@ func (checker *Checker) Close() error {
func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
var monStats durabilityStats
err = checker.metainfo.Iterate("", checker.lastChecked, true, false,
func(it storage.Iterator) error {
var item storage.ListItem
@ -129,11 +113,14 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
// if we have finished iterating, send and reset durability stats
if checker.lastChecked == "" {
// send durability stats
mon.IntVal("remote_files_checked").Observe(monStats.remoteFilesChecked)
mon.IntVal("remote_segments_checked").Observe(monStats.remoteSegmentsChecked)
mon.IntVal("remote_segments_needing_repair").Observe(monStats.remoteSegmentsNeedingRepair)
mon.IntVal("remote_segments_lost").Observe(monStats.remoteSegmentsLost)
mon.IntVal("remote_files_lost").Observe(int64(len(monStats.remoteSegmentInfo)))
mon.IntVal("remote_files_checked").Observe(checker.monStats.remoteFilesChecked)
mon.IntVal("remote_segments_checked").Observe(checker.monStats.remoteSegmentsChecked)
mon.IntVal("remote_segments_needing_repair").Observe(checker.monStats.remoteSegmentsNeedingRepair)
mon.IntVal("remote_segments_lost").Observe(checker.monStats.remoteSegmentsLost)
mon.IntVal("remote_files_lost").Observe(int64(len(checker.monStats.remoteSegmentInfo)))
// reset durability stats for next iteration
checker.monStats = durabilityStats{}
}
}()
@ -145,7 +132,7 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
return Error.New("error unmarshalling pointer %s", err)
}
err = checker.updateSegmentStatus(ctx, pointer, item.Key.String(), &monStats)
err = checker.updateSegmentStatus(ctx, pointer, item.Key.String(), &checker.monStats)
if err != nil {
return err
}
@ -253,7 +240,6 @@ func (checker *Checker) IrreparableProcess(ctx context.Context) (err error) {
limit := 1
var offset int64
var monStats durabilityStats
for {
seg, err := checker.irrdb.GetLimited(ctx, limit, offset)
@ -266,18 +252,12 @@ func (checker *Checker) IrreparableProcess(ctx context.Context) (err error) {
break
}
err = checker.updateSegmentStatus(ctx, seg[0].GetSegmentDetail(), string(seg[0].GetPath()), &monStats)
err = checker.updateSegmentStatus(ctx, seg[0].GetSegmentDetail(), string(seg[0].GetPath()), &durabilityStats{})
if err != nil {
checker.logger.Error("irrepair segment checker failed: ", zap.Error(err))
}
offset++
}
// send durability stats
mon.IntVal("remote_files_checked").Observe(monStats.remoteFilesChecked)
mon.IntVal("remote_segments_checked").Observe(monStats.remoteSegmentsChecked)
mon.IntVal("remote_segments_needing_repair").Observe(monStats.remoteSegmentsNeedingRepair)
mon.IntVal("remote_segments_lost").Observe(monStats.remoteSegmentsLost)
mon.IntVal("remote_files_lost").Observe(int64(len(monStats.remoteSegmentInfo)))
return nil
}