cmd/uplink: support --trace-tags for custom tags

We do regular testing with executing uplink. But sometimes the recorded execution time showed spikes.

Would be nice to know the reason of the spikes (just internet blip, or something what we should be worried about).

We can collect distributed traces, but it's not easy to find the right trace in Jaeger.

 * We can provide a random trace-id, but it should be persisted / processed
 * We can also save standard output and use `--trace-verbose` which prints out the used trace id, but it's also complicated to collect all of them in a DB

Would be nice to attach additional metadata to traces to make sure that we can filter all traces of one specific kind of test.

This patch provides this feature:

  * It always adds hostname to the trace (if you opt-in to distributed tracing, which is turned off by default)
  * Additional tags can be defined with CLI flag

Tags can be used to find the right trace in Jaeger (or in Elastic search backend of Jaeger).

Change-Id: I08f10023bbebd783f812cfca95ac6237360ac2b0
This commit is contained in:
Márton Elek 2023-03-09 11:42:08 +01:00 committed by Storj Robot
parent c24341bcab
commit 0b66d22be4

View File

@ -59,10 +59,11 @@ type external struct {
}
tracing struct {
traceID int64 // if non-zero, sets outgoing traces to the given id
traceAddress string // if non-zero, sampled spans are sent to this trace collector address.
sample float64 // the chance (number between 0 and 1.0) to send samples to the server.
verbose bool // flag to print out tracing information (like the used trace id)
traceID int64 // if non-zero, sets outgoing traces to the given id
traceAddress string // if non-zero, sampled spans are sent to this trace collector address.
tags map[string]string // coma separated k=v pairs to be added to the trace
sample float64 // the chance (number between 0 and 1.0) to send samples to the server.
verbose bool // flag to print out tracing information (like the used trace id)
}
debug struct {
@ -121,6 +122,19 @@ func (ex *external) Setup(f clingy.Flags) {
clingy.Advanced,
).(string)
ex.tracing.tags = f.Flag(
"trace-tags", "coma separated k=v pairs to be added to distributed traces", map[string]string{},
clingy.Advanced,
clingy.Transform(func(val string) (map[string]string, error) {
res := map[string]string{}
for _, kv := range strings.Split(val, ",") {
parts := strings.SplitN(kv, "=", 2)
res[parts[0]] = parts[1]
}
return res, nil
}),
).(map[string]string)
ex.events.address = f.Flag(
"events-addr", "Specify where to send events", "eventkitd.datasci.storj.io:9002",
clingy.Advanced,
@ -315,6 +329,15 @@ func (ex *external) Wrap(ctx context.Context, cmd clingy.Command) (err error) {
defer mon.Func().RemoteTrace(&ctx, monkit.NewId(), trace)(&err)
}
monkit.Default.ObserveTraces(func(trace *monkit.Trace) {
if hn, err := os.Hostname(); err == nil {
trace.Set("hostname", hn)
}
for k, v := range ex.tracing.tags {
trace.Set(k, v)
}
})
}
if ex.analyticsEnabled() && ex.events.address != "" {