satellite: more detailed goroutine labels

This will apply an appropriate "subsystem" label to goroutines which are
part of the core, api, repairer, admin, or gc subsystems.

It will also label goroutines whose job it is to watch for slow shutdown
of lifecycle groups (there are a lot of these).

Finally, this will also label goroutines whose job it is to wait on the
toplevel errgroup of a subsystem.

Change-Id: I560b5fff4a0101300d6c9a67609c2d80d7424486
This commit is contained in:
paul cannon 2022-05-06 16:04:07 -05:00 committed by Ivan Fraixedes
parent 89cc544b34
commit 41c5879f7c
6 changed files with 47 additions and 17 deletions

View File

@ -61,7 +61,7 @@ func (group *Group) Run(ctx context.Context, g *errgroup.Group) {
}
shutdownCtx, shutdownFinished := context.WithCancel(context.Background())
go func() {
go pprof.Do(ctx, pprof.Labels("name", "slow_shutdown:"+item.Name), func(ctx context.Context) {
select {
case <-ctx.Done():
case <-shutdownCtx.Done():
@ -77,7 +77,7 @@ func (group *Group) Run(ctx context.Context, g *errgroup.Group) {
group.logStackTrace()
case <-shutdownCtx.Done():
}
}()
})
g.Go(func() error {
defer shutdownFinished()

View File

@ -7,6 +7,7 @@ import (
"context"
"errors"
"net"
"runtime/pprof"
"github.com/spacemonkeygo/monkit/v3"
"github.com/zeebo/errs"
@ -190,10 +191,15 @@ func (peer *Admin) Run(ctx context.Context) (err error) {
group, ctx := errgroup.WithContext(ctx)
peer.Servers.Run(ctx, group)
peer.Services.Run(ctx, group)
pprof.Do(ctx, pprof.Labels("subsystem", "admin"), func(ctx context.Context) {
peer.Servers.Run(ctx, group)
peer.Services.Run(ctx, group)
return group.Wait()
pprof.Do(ctx, pprof.Labels("name", "subsystem-wait"), func(ctx context.Context) {
err = group.Wait()
})
})
return err
}
// Close closes all the resources.

View File

@ -10,6 +10,7 @@ import (
"net"
"net/mail"
"net/smtp"
"runtime/pprof"
"github.com/spacemonkeygo/monkit/v3"
"github.com/zeebo/errs"
@ -694,10 +695,15 @@ func (peer *API) Run(ctx context.Context) (err error) {
group, ctx := errgroup.WithContext(ctx)
peer.Servers.Run(ctx, group)
peer.Services.Run(ctx, group)
pprof.Do(ctx, pprof.Labels("subsystem", "api"), func(ctx context.Context) {
peer.Servers.Run(ctx, group)
peer.Services.Run(ctx, group)
return group.Wait()
pprof.Do(ctx, pprof.Labels("name", "subsystem-wait"), func(ctx context.Context) {
err = group.Wait()
})
})
return err
}
// Close closes all the resources.

View File

@ -7,6 +7,7 @@ import (
"context"
"errors"
"net"
"runtime/pprof"
"github.com/spacemonkeygo/monkit/v3"
"github.com/zeebo/errs"
@ -534,10 +535,15 @@ func (peer *Core) Run(ctx context.Context) (err error) {
group, ctx := errgroup.WithContext(ctx)
peer.Servers.Run(ctx, group)
peer.Services.Run(ctx, group)
pprof.Do(ctx, pprof.Labels("subsystem", "core"), func(ctx context.Context) {
peer.Servers.Run(ctx, group)
peer.Services.Run(ctx, group)
return group.Wait()
pprof.Do(ctx, pprof.Labels("name", "subsystem-wait"), func(ctx context.Context) {
err = group.Wait()
})
})
return err
}
// Close closes all the resources.

View File

@ -7,6 +7,7 @@ import (
"context"
"errors"
"net"
"runtime/pprof"
"github.com/spacemonkeygo/monkit/v3"
"github.com/zeebo/errs"
@ -169,10 +170,15 @@ func (peer *GarbageCollection) Run(ctx context.Context) (err error) {
group, ctx := errgroup.WithContext(ctx)
peer.Servers.Run(ctx, group)
peer.Services.Run(ctx, group)
pprof.Do(ctx, pprof.Labels("subsystem", "gc"), func(ctx context.Context) {
peer.Servers.Run(ctx, group)
peer.Services.Run(ctx, group)
return group.Wait()
pprof.Do(ctx, pprof.Labels("name", "subsystem-wait"), func(ctx context.Context) {
err = group.Wait()
})
})
return err
}
// Close closes all the resources.

View File

@ -7,6 +7,7 @@ import (
"context"
"errors"
"net"
"runtime/pprof"
"github.com/spacemonkeygo/monkit/v3"
"github.com/zeebo/errs"
@ -244,10 +245,15 @@ func (peer *Repairer) Run(ctx context.Context) (err error) {
group, ctx := errgroup.WithContext(ctx)
peer.Servers.Run(ctx, group)
peer.Services.Run(ctx, group)
pprof.Do(ctx, pprof.Labels("subsystem", "repairer"), func(ctx context.Context) {
peer.Servers.Run(ctx, group)
peer.Services.Run(ctx, group)
return group.Wait()
pprof.Do(ctx, pprof.Labels("name", "subsystem-wait"), func(ctx context.Context) {
err = group.Wait()
})
})
return err
}
// Close closes all the resources.