diff --git a/Jenkinsfile.public b/Jenkinsfile.public index 61f8c6564..a23cbb553 100644 --- a/Jenkinsfile.public +++ b/Jenkinsfile.public @@ -49,6 +49,7 @@ pipeline { sh 'golangci-lint -j=2 run' sh 'go run scripts/check-mod-tidy.go -mod .build/go.mod.orig' sh 'make check-satellite-config-lock' + sh 'make check-monitoring' } } diff --git a/Makefile b/Makefile index 14d57c380..eaa35d17d 100644 --- a/Makefile +++ b/Makefile @@ -132,6 +132,13 @@ test-sim-backwards-compatible: ## Test uploading a file with lastest release (je @echo "Running ${@}" @./scripts/test-sim-backwards.sh +.PHONY: check-monitoring +check-monitoring: ## Check for locked monkit calls that have changed + @echo "Running ${@}" + @go run ./scripts/check-monitoring.go | diff -U0 ./monkit.lock - \ + || (echo "Locked monkit metrics have been changed. Notify #data-science and run \`go generate ./scripts/check-monitoring.go\` to update monkit.lock file." \ + && exit 1) + ##@ Build .PHONY: storagenode-console diff --git a/monkit.lock b/monkit.lock new file mode 100644 index 000000000..a3dcca8ac --- /dev/null +++ b/monkit.lock @@ -0,0 +1,18 @@ +storj.io/storj/satellite/accounting/tally."total.bytes" IntVal +storj.io/storj/satellite/accounting/tally."total.inline_bytes" IntVal +storj.io/storj/satellite/accounting/tally."total.inline_segments" IntVal +storj.io/storj/satellite/accounting/tally."total.objects" IntVal +storj.io/storj/satellite/accounting/tally."total.remote_bytes" IntVal +storj.io/storj/satellite/accounting/tally."total.remote_segments" IntVal +storj.io/storj/satellite/accounting/tally."total.segments" IntVal +storj.io/storj/satellite/repair/checker."checker_segment_age" IntVal +storj.io/storj/satellite/repair/checker."checker_segment_healthy_count" IntVal +storj.io/storj/satellite/repair/checker."checker_segment_time_until_irreparable" IntVal +storj.io/storj/satellite/repair/checker."checker_segment_total_count" IntVal +storj.io/storj/satellite/repair/checker."remote_files_checked" IntVal +storj.io/storj/satellite/repair/checker."remote_files_lost" IntVal +storj.io/storj/satellite/repair/checker."remote_segments_checked" IntVal +storj.io/storj/satellite/repair/checker."remote_segments_lost" IntVal +storj.io/storj/satellite/repair/checker."remote_segments_needing_repair" IntVal +storj.io/storj/satellite/satellitedb."audit_reputation_alpha" FloatVal +storj.io/storj/satellite/satellitedb."audit_reputation_beta" FloatVal diff --git a/satellite/accounting/tally/tally.go b/satellite/accounting/tally/tally.go index ea9d153d8..3f0934158 100644 --- a/satellite/accounting/tally/tally.go +++ b/satellite/accounting/tally/tally.go @@ -9,7 +9,7 @@ import ( "github.com/zeebo/errs" "go.uber.org/zap" - monkit "gopkg.in/spacemonkeygo/monkit.v2" + "gopkg.in/spacemonkeygo/monkit.v2" "storj.io/storj/internal/sync2" "storj.io/storj/pkg/pb" @@ -132,10 +132,26 @@ func (service *Service) Tally(ctx context.Context) (err error) { if len(observer.Bucket) > 0 { var total accounting.BucketTally for _, bucket := range observer.Bucket { - bucketReport(bucket, "bucket") + monAccounting.IntVal("bucket.objects").Observe(bucket.ObjectCount) + + monAccounting.IntVal("bucket.segments").Observe(bucket.Segments()) + monAccounting.IntVal("bucket.inline_segments").Observe(bucket.InlineSegments) + monAccounting.IntVal("bucket.remote_segments").Observe(bucket.RemoteSegments) + + monAccounting.IntVal("bucket.bytes").Observe(bucket.Bytes()) + monAccounting.IntVal("bucket.inline_bytes").Observe(bucket.InlineBytes) + monAccounting.IntVal("bucket.remote_bytes").Observe(bucket.RemoteBytes) total.Combine(bucket) } - bucketReport(&total, "total") + monAccounting.IntVal("total.objects").Observe(total.ObjectCount) //locked + + monAccounting.IntVal("total.segments").Observe(total.Segments()) //locked + monAccounting.IntVal("total.inline_segments").Observe(total.InlineSegments) //locked + monAccounting.IntVal("total.remote_segments").Observe(total.RemoteSegments) //locked + + monAccounting.IntVal("total.bytes").Observe(total.Bytes()) //locked + monAccounting.IntVal("total.inline_bytes").Observe(total.InlineBytes) //locked + monAccounting.IntVal("total.remote_bytes").Observe(total.RemoteBytes) //locked } // return errors if something went wrong. @@ -219,16 +235,3 @@ func (observer *Observer) RemoteSegment(ctx context.Context, path metainfo.Scope // using custom name to avoid breaking monitoring var monAccounting = monkit.ScopeNamed("storj.io/storj/satellite/accounting") - -// bucketReport reports the stats thru monkit -func bucketReport(tally *accounting.BucketTally, prefix string) { - monAccounting.IntVal(prefix + ".objects").Observe(tally.ObjectCount) - - monAccounting.IntVal(prefix + ".segments").Observe(tally.Segments()) - monAccounting.IntVal(prefix + ".inline_segments").Observe(tally.InlineSegments) - monAccounting.IntVal(prefix + ".remote_segments").Observe(tally.RemoteSegments) - - monAccounting.IntVal(prefix + ".bytes").Observe(tally.Bytes()) - monAccounting.IntVal(prefix + ".inline_bytes").Observe(tally.InlineBytes) - monAccounting.IntVal(prefix + ".remote_bytes").Observe(tally.RemoteBytes) -} diff --git a/satellite/repair/checker/checker.go b/satellite/repair/checker/checker.go index 50b64efb6..4508ff89d 100644 --- a/satellite/repair/checker/checker.go +++ b/satellite/repair/checker/checker.go @@ -126,11 +126,11 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error) return err } - mon.IntVal("remote_files_checked").Observe(observer.monStats.objectsChecked) - mon.IntVal("remote_segments_checked").Observe(observer.monStats.remoteSegmentsChecked) - mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) - mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) - mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) + mon.IntVal("remote_files_checked").Observe(observer.monStats.objectsChecked) //locked + mon.IntVal("remote_segments_checked").Observe(observer.monStats.remoteSegmentsChecked) //locked + mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) //locked + mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) //locked + mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) //locked return nil } @@ -237,11 +237,11 @@ func (obs *checkerObserver) RemoteSegment(ctx context.Context, path metainfo.Sco } numHealthy := int32(len(pieces) - len(missingPieces)) - mon.IntVal("checker_segment_total_count").Observe(int64(len(pieces))) - mon.IntVal("checker_segment_healthy_count").Observe(int64(numHealthy)) + mon.IntVal("checker_segment_total_count").Observe(int64(len(pieces))) //locked + mon.IntVal("checker_segment_healthy_count").Observe(int64(numHealthy)) //locked segmentAge := time.Since(pointer.CreationDate) - mon.IntVal("checker_segment_age").Observe(int64(segmentAge.Seconds())) + mon.IntVal("checker_segment_age").Observe(int64(segmentAge.Seconds())) //locked redundancy := pointer.Remote.Redundancy @@ -294,7 +294,7 @@ func (obs *checkerObserver) RemoteSegment(ctx context.Context, path metainfo.Sco } else { segmentAge = time.Since(pointer.CreationDate) } - mon.IntVal("checker_segment_time_until_irreparable").Observe(int64(segmentAge.Seconds())) + mon.IntVal("checker_segment_time_until_irreparable").Observe(int64(segmentAge.Seconds())) //locked obs.monStats.remoteSegmentsLost++ // make an entry into the irreparable table diff --git a/satellite/satellitedb/overlaycache.go b/satellite/satellitedb/overlaycache.go index ac87373a3..0ad4e5ce2 100644 --- a/satellite/satellitedb/overlaycache.go +++ b/satellite/satellitedb/overlaycache.go @@ -1339,8 +1339,8 @@ func populateUpdateNodeStats(dbNode *dbx.Node, updateReq *overlay.UpdateRequest) updateReq.AuditWeight, dbNode.TotalAuditCount, ) - mon.FloatVal("audit_reputation_alpha").Observe(auditAlpha) - mon.FloatVal("audit_reputation_beta").Observe(auditBeta) + mon.FloatVal("audit_reputation_alpha").Observe(auditAlpha) //locked + mon.FloatVal("audit_reputation_beta").Observe(auditBeta) //locked uptimeAlpha, uptimeBeta, totalUptimeCount := updateReputation( updateReq.IsUp, diff --git a/scripts/check-monitoring.go b/scripts/check-monitoring.go new file mode 100644 index 000000000..7e737dedc --- /dev/null +++ b/scripts/check-monitoring.go @@ -0,0 +1,181 @@ +// Copyright (C) 2019 Storj Labs, Inc. +// See LICENSE for copying information. + +//go:generate go run check-monitoring.go ../monkit.lock + +// +build ignore + +package main + +import ( + "fmt" + "go/ast" + "go/token" + "go/types" + "io/ioutil" + "log" + "os" + "sort" + "strings" + + "golang.org/x/tools/go/packages" +) + +var ( + monkitPath = "gopkg.in/spacemonkeygo/monkit.v2" + lockFilePerms = os.FileMode(0644) +) + +func main() { + pkgs, err := packages.Load(&packages.Config{ + Mode: packages.NeedCompiledGoFiles | packages.NeedSyntax | packages.NeedName | packages.NeedTypes | packages.NeedTypesInfo, + }, "storj.io/storj/...") + if err != nil { + log.Fatalf("error while loading packages: %s", err) + } + + var lockedFnNames []string + for _, pkg := range pkgs { + lockedFnNames = append(lockedFnNames, findLockedFnNames(pkg)...) + } + sortedNames := sortAndUnique(lockedFnNames) + + outputStr := strings.Join(sortedNames, "\n") + if len(os.Args) == 2 { + file := os.Args[1] + if err := ioutil.WriteFile(file, []byte(outputStr+"\n"), lockFilePerms); err != nil { + log.Fatalf("error while writing to file \"%s\": %s", file, err) + } + } else { + fmt.Println(outputStr) + } +} + +func findLockedFnNames(pkg *packages.Package) []string { + var ( + lockedTasksPos []token.Pos + lockedTaskFns []*ast.FuncDecl + lockedFnInfos []string + ) + + // Collect locked comments and what line they are on. + for _, file := range pkg.Syntax { + lockedLines := make(map[int]struct{}) + for _, group := range file.Comments { + for _, comment := range group.List { + if comment.Text == "//locked" { + commentLine := pkg.Fset.Position(comment.Pos()).Line + lockedLines[commentLine] = struct{}{} + } + } + } + if len(lockedLines) == 0 { + continue + } + + // Find calls to monkit functions we're interested in that are on the + // same line as a "locked" comment and keep track of their position. + // NB: always return true to walk entire node tree. + ast.Inspect(file, func(node ast.Node) bool { + if node == nil { + return true + } + if !isMonkitCall(pkg, node) { + return true + } + + // Ensure call line matches a "locked" comment line. + callLine := pkg.Fset.Position(node.End()).Line + if _, ok := lockedLines[callLine]; !ok { + return true + } + + // We are already checking to ensure that these type assertions are valid in `isMonkitCall`. + sel := node.(*ast.CallExpr).Fun.(*ast.SelectorExpr) + + // Track `mon.Task` calls. + if sel.Sel.Name == "Task" { + lockedTasksPos = append(lockedTasksPos, node.End()) + return true + } + + // Track other monkit calls that have one string argument (e.g. monkit.FloatVal, etc.) + // and transform them to representative string. + if len(node.(*ast.CallExpr).Args) != 1 { + return true + } + argLiteral, ok := node.(*ast.CallExpr).Args[0].(*ast.BasicLit) + if !ok { + return true + } + if argLiteral.Kind == token.STRING { + lockedFnInfo := pkg.PkgPath + "." + argLiteral.Value + " " + sel.Sel.Name + lockedFnInfos = append(lockedFnInfos, lockedFnInfo) + } + return true + }) + + // Track all function declarations containing locked `mon.Task` calls. + ast.Inspect(file, func(node ast.Node) bool { + fn, ok := node.(*ast.FuncDecl) + if !ok { + return true + } + for _, locked := range lockedTasksPos { + if fn.Pos() < locked && locked < fn.End() { + lockedTaskFns = append(lockedTaskFns, fn) + } + } + return true + }) + + } + + // Transform the ast.FuncDecls containing locked `mon.Task` calls to representative string. + for _, fn := range lockedTaskFns { + object := pkg.TypesInfo.Defs[fn.Name] + + var receiver string + if fn.Recv != nil { + typ := fn.Recv.List[0].Type + if star, ok := typ.(*ast.StarExpr); ok { + receiver = ".*" + typ = star.X + } else { + receiver = "." + } + recvObj := pkg.TypesInfo.Uses[typ.(*ast.Ident)] + receiver += recvObj.Name() + } + + lockedFnInfo := object.Pkg().Path() + receiver + "." + object.Name() + " Task" + lockedFnInfos = append(lockedFnInfos, lockedFnInfo) + + } + return lockedFnInfos +} + +// isMonkitCall returns whether the node is a call to a function in the monkit package. +func isMonkitCall(pkg *packages.Package, in ast.Node) bool { + defer func() { recover() }() + + ident := in.(*ast.CallExpr).Fun.(*ast.SelectorExpr).X.(*ast.Ident) + + return pkg.TypesInfo.Uses[ident].(*types.Var).Type().(*types.Pointer).Elem().(*types.Named).Obj().Pkg().Path() == monkitPath +} + +func sortAndUnique(input []string) (unique []string) { + set := make(map[string]struct{}) + for _, item := range input { + if _, ok := set[item]; ok { + continue + } else { + set[item] = struct{}{} + } + } + for item := range set { + unique = append(unique, item) + } + sort.Strings(unique) + return unique +}