From 41c5879f7c00e34de15e5a28c84688a4f797b566 Mon Sep 17 00:00:00 2001 From: paul cannon Date: Fri, 6 May 2022 16:04:07 -0500 Subject: [PATCH] satellite: more detailed goroutine labels This will apply an appropriate "subsystem" label to goroutines which are part of the core, api, repairer, admin, or gc subsystems. It will also label goroutines whose job it is to watch for slow shutdown of lifecycle groups (there are a lot of these). Finally, this will also label goroutines whose job it is to wait on the toplevel errgroup of a subsystem. Change-Id: I560b5fff4a0101300d6c9a67609c2d80d7424486 --- private/lifecycle/group.go | 4 ++-- satellite/admin.go | 12 +++++++++--- satellite/api.go | 12 +++++++++--- satellite/core.go | 12 +++++++++--- satellite/gc.go | 12 +++++++++--- satellite/repairer.go | 12 +++++++++--- 6 files changed, 47 insertions(+), 17 deletions(-) diff --git a/private/lifecycle/group.go b/private/lifecycle/group.go index 006279490..0c6337af3 100644 --- a/private/lifecycle/group.go +++ b/private/lifecycle/group.go @@ -61,7 +61,7 @@ func (group *Group) Run(ctx context.Context, g *errgroup.Group) { } shutdownCtx, shutdownFinished := context.WithCancel(context.Background()) - go func() { + go pprof.Do(ctx, pprof.Labels("name", "slow_shutdown:"+item.Name), func(ctx context.Context) { select { case <-ctx.Done(): case <-shutdownCtx.Done(): @@ -77,7 +77,7 @@ func (group *Group) Run(ctx context.Context, g *errgroup.Group) { group.logStackTrace() case <-shutdownCtx.Done(): } - }() + }) g.Go(func() error { defer shutdownFinished() diff --git a/satellite/admin.go b/satellite/admin.go index e2bd20f51..7cb610640 100644 --- a/satellite/admin.go +++ b/satellite/admin.go @@ -7,6 +7,7 @@ import ( "context" "errors" "net" + "runtime/pprof" "github.com/spacemonkeygo/monkit/v3" "github.com/zeebo/errs" @@ -190,10 +191,15 @@ func (peer *Admin) Run(ctx context.Context) (err error) { group, ctx := errgroup.WithContext(ctx) - peer.Servers.Run(ctx, group) - peer.Services.Run(ctx, group) + pprof.Do(ctx, pprof.Labels("subsystem", "admin"), func(ctx context.Context) { + peer.Servers.Run(ctx, group) + peer.Services.Run(ctx, group) - return group.Wait() + pprof.Do(ctx, pprof.Labels("name", "subsystem-wait"), func(ctx context.Context) { + err = group.Wait() + }) + }) + return err } // Close closes all the resources. diff --git a/satellite/api.go b/satellite/api.go index 01f873dd7..d98fd1603 100644 --- a/satellite/api.go +++ b/satellite/api.go @@ -10,6 +10,7 @@ import ( "net" "net/mail" "net/smtp" + "runtime/pprof" "github.com/spacemonkeygo/monkit/v3" "github.com/zeebo/errs" @@ -694,10 +695,15 @@ func (peer *API) Run(ctx context.Context) (err error) { group, ctx := errgroup.WithContext(ctx) - peer.Servers.Run(ctx, group) - peer.Services.Run(ctx, group) + pprof.Do(ctx, pprof.Labels("subsystem", "api"), func(ctx context.Context) { + peer.Servers.Run(ctx, group) + peer.Services.Run(ctx, group) - return group.Wait() + pprof.Do(ctx, pprof.Labels("name", "subsystem-wait"), func(ctx context.Context) { + err = group.Wait() + }) + }) + return err } // Close closes all the resources. diff --git a/satellite/core.go b/satellite/core.go index 1bd601ba6..3dd0ddb54 100644 --- a/satellite/core.go +++ b/satellite/core.go @@ -7,6 +7,7 @@ import ( "context" "errors" "net" + "runtime/pprof" "github.com/spacemonkeygo/monkit/v3" "github.com/zeebo/errs" @@ -534,10 +535,15 @@ func (peer *Core) Run(ctx context.Context) (err error) { group, ctx := errgroup.WithContext(ctx) - peer.Servers.Run(ctx, group) - peer.Services.Run(ctx, group) + pprof.Do(ctx, pprof.Labels("subsystem", "core"), func(ctx context.Context) { + peer.Servers.Run(ctx, group) + peer.Services.Run(ctx, group) - return group.Wait() + pprof.Do(ctx, pprof.Labels("name", "subsystem-wait"), func(ctx context.Context) { + err = group.Wait() + }) + }) + return err } // Close closes all the resources. diff --git a/satellite/gc.go b/satellite/gc.go index 1b7e0d1c9..1ce966729 100644 --- a/satellite/gc.go +++ b/satellite/gc.go @@ -7,6 +7,7 @@ import ( "context" "errors" "net" + "runtime/pprof" "github.com/spacemonkeygo/monkit/v3" "github.com/zeebo/errs" @@ -169,10 +170,15 @@ func (peer *GarbageCollection) Run(ctx context.Context) (err error) { group, ctx := errgroup.WithContext(ctx) - peer.Servers.Run(ctx, group) - peer.Services.Run(ctx, group) + pprof.Do(ctx, pprof.Labels("subsystem", "gc"), func(ctx context.Context) { + peer.Servers.Run(ctx, group) + peer.Services.Run(ctx, group) - return group.Wait() + pprof.Do(ctx, pprof.Labels("name", "subsystem-wait"), func(ctx context.Context) { + err = group.Wait() + }) + }) + return err } // Close closes all the resources. diff --git a/satellite/repairer.go b/satellite/repairer.go index 9f348d4e4..cf074867e 100644 --- a/satellite/repairer.go +++ b/satellite/repairer.go @@ -7,6 +7,7 @@ import ( "context" "errors" "net" + "runtime/pprof" "github.com/spacemonkeygo/monkit/v3" "github.com/zeebo/errs" @@ -244,10 +245,15 @@ func (peer *Repairer) Run(ctx context.Context) (err error) { group, ctx := errgroup.WithContext(ctx) - peer.Servers.Run(ctx, group) - peer.Services.Run(ctx, group) + pprof.Do(ctx, pprof.Labels("subsystem", "repairer"), func(ctx context.Context) { + peer.Servers.Run(ctx, group) + peer.Services.Run(ctx, group) - return group.Wait() + pprof.Do(ctx, pprof.Labels("name", "subsystem-wait"), func(ctx context.Context) { + err = group.Wait() + }) + }) + return err } // Close closes all the resources.