Skip to content

Commit e4c1a7a

Browse files
authored
Tracing: Standardize on otel tracing (grafana#75528)
1 parent 4432c4c commit e4c1a7a

File tree

46 files changed

+321
-439
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+321
-439
lines changed

contribute/backend/instrumentation.md

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ A distributed trace is data that tracks an application request as it flows throu
156156

157157
### Usage
158158

159-
Grafana currently supports two tracing implementations, [OpenTelemetry](https://opentelemetry.io/) and [OpenTracing](https://opentracing.io/). OpenTracing is deprecated, but still supported until we remove it. The two different implementations implements the `Tracer` and `Span` interfaces, defined in the _pkg/infra/tracing_ package, which you can use to create traces and spans. To get a hold of a `Tracer` you would need to get it injected as dependency into your service, see [Services](services.md) for more details.
159+
Grafana uses [OpenTelemetry](https://opentelemetry.io/) for distributed tracing. There's an interface `Tracer` in the _pkg/infra/tracing_ package that implements the [OpenTelemetry Tracer interface](go.opentelemetry.io/otel/trace), which you can use to create traces and spans. To get a hold of a `Tracer` you would need to get it injected as dependency into your service, see [Services](services.md) for more details. For more information, see https://opentelemetry.io/docs/instrumentation/go/manual/.
160160

161161
Example:
162162

@@ -166,6 +166,7 @@ import (
166166

167167
"github.com/grafana/grafana/pkg/infra/tracing"
168168
"go.opentelemetry.io/otel/attribute"
169+
"go.opentelemetry.io/otel/trace"
169170
)
170171

171172
type MyService struct {
@@ -179,36 +180,36 @@ func ProvideService(tracer tracing.Tracer) *MyService {
179180
}
180181

181182
func (s *MyService) Hello(ctx context.Context, name string) (string, error) {
182-
ctx, span := s.tracer.Start(ctx, "MyService.Hello")
183+
ctx, span := s.tracer.Start(ctx, "MyService.Hello", trace.WithAttributes(
184+
attribute.String("my_attribute", "val"),
185+
))
183186
// this make sure the span is marked as finished when this
184187
// method ends to allow the span to be flushed and sent to
185188
// storage backend.
186189
defer span.End()
187190

188191
// Add some event to show Events usage
189-
span.AddEvents(
190-
[]string{"message"},
191-
[]tracing.EventValue{
192-
{Str: "checking name..."},
193-
})
192+
span.AddEvent("checking name...")
194193

195194
if name == "" {
196195
err := fmt.Errorf("name cannot be empty")
197196

197+
// sets the span’s status to Error to make the span tracking
198+
// a failed operation as an error span.
199+
span.SetStatus(codes.Error, "failed to check name")
198200
// record err as an exception span event for this span
199201
span.RecordError(err)
200202
return "", err
201203
}
202204

203205
// Add some other event to show Events usage
204-
span.AddEvents(
205-
[]string{"message"},
206-
[]tracing.EventValue{
207-
{Str: "name checked"},
208-
})
206+
span.AddEvent("name checked")
209207

210208
// Add attribute to show Attributes usage
211-
span.SetAttributes("my_service.name", name, attribute.Key("my_service.name").String(name))
209+
span.SetAttributes(
210+
attribute.String("my_service.name", name),
211+
attribute.Int64("my_service.some_other", int64(1337)),
212+
)
212213

213214
return fmt.Sprintf("Hello %s", name), nil
214215
}
@@ -243,6 +244,22 @@ If span names, attribute or event values originates from user input they **shoul
243244

244245
Be **careful** to not expose any sensitive information in span names, attribute or event values, e.g. secrets, credentials etc.
245246

247+
### Span attributes
248+
249+
Consider using `attributes.<Type>("<key>", <value>)` in favor of `attributes.Key("<key>").<Type>(<value>)` since it requires less characters and thereby reads easier.
250+
251+
Example:
252+
253+
```go
254+
attribute.String("datasource_name", proxy.ds.Name)
255+
// vs
256+
attribute.Key("datasource_name").String(proxy.ds.Name)
257+
258+
attribute.Int64("org_id", proxy.ctx.SignedInUser.OrgID)
259+
// vs
260+
attribute.Key("org_id").Int64(proxy.ctx.SignedInUser.OrgID)
261+
```
262+
246263
### How to collect, visualize and query traces (and correlate logs with traces) locally
247264

248265
#### 1. Start Jaeger
@@ -255,20 +272,11 @@ make devenv sources=jaeger
255272

256273
To enable tracing in Grafana, you must set the address in your config.ini file
257274

258-
opentelemetry tracing (recommended):
259-
260275
```ini
261276
[tracing.opentelemetry.jaeger]
262277
address = http://localhost:14268/api/traces
263278
```
264279

265-
opentracing tracing (deprecated/not recommended):
266-
267-
```ini
268-
[tracing.jaeger]
269-
address = localhost:6831
270-
```
271-
272280
#### 3. Search/browse collected logs and traces in Grafana Explore
273281

274282
You need provisioned gdev-jaeger and gdev-loki datasources, see [developer dashboard and data sources](https://github.com/grafana/grafana/tree/main/devenv#developer-dashboards-and-data-sources) for setup instructions.

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ require (
121121
gopkg.in/mail.v2 v2.3.1 // @grafana/backend-platform
122122
gopkg.in/yaml.v2 v2.4.0 // indirect
123123
gopkg.in/yaml.v3 v3.0.1 // @grafana/alerting-squad-backend
124-
xorm.io/builder v0.3.6 // indirect; @grafana/backend-platform
124+
xorm.io/builder v0.3.6 // @grafana/backend-platform
125125
xorm.io/core v0.7.3 // @grafana/backend-platform
126126
xorm.io/xorm v0.8.2 // @grafana/alerting-squad-backend
127127
)
@@ -174,7 +174,7 @@ require (
174174
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.1-0.20191002090509-6af20e3a5340 // indirect
175175
github.com/hashicorp/errwrap v1.1.0 // indirect
176176
github.com/hashicorp/go-msgpack v0.5.5 // indirect
177-
github.com/hashicorp/go-multierror v1.1.1 // indirect; @grafana/grafana-as-code
177+
github.com/hashicorp/go-multierror v1.1.1 // @grafana/grafana-as-code
178178
github.com/hashicorp/go-sockaddr v1.0.2 // indirect
179179
github.com/hashicorp/golang-lru v0.6.0 // indirect
180180
github.com/hashicorp/yamux v0.1.1 // indirect

pkg/api/pluginproxy/ds_proxy.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"time"
1313

1414
"go.opentelemetry.io/otel/attribute"
15+
"go.opentelemetry.io/otel/trace"
1516

1617
"github.com/grafana/grafana/pkg/api/datasource"
1718
"github.com/grafana/grafana/pkg/infra/httpclient"
@@ -142,10 +143,12 @@ func (proxy *DataSourceProxy) HandleRequest() {
142143

143144
proxy.ctx.Req = proxy.ctx.Req.WithContext(ctx)
144145

145-
span.SetAttributes("datasource_name", proxy.ds.Name, attribute.Key("datasource_name").String(proxy.ds.Name))
146-
span.SetAttributes("datasource_type", proxy.ds.Type, attribute.Key("datasource_type").String(proxy.ds.Type))
147-
span.SetAttributes("user", proxy.ctx.SignedInUser.Login, attribute.Key("user").String(proxy.ctx.SignedInUser.Login))
148-
span.SetAttributes("org_id", proxy.ctx.SignedInUser.OrgID, attribute.Key("org_id").Int64(proxy.ctx.SignedInUser.OrgID))
146+
span.SetAttributes(
147+
attribute.String("datasource_name", proxy.ds.Name),
148+
attribute.String("datasource_type", proxy.ds.Type),
149+
attribute.String("user", proxy.ctx.SignedInUser.Login),
150+
attribute.Int64("org_id", proxy.ctx.SignedInUser.OrgID),
151+
)
149152

150153
proxy.addTraceFromHeaderValue(span, "X-Panel-Id", "panel_id")
151154
proxy.addTraceFromHeaderValue(span, "X-Dashboard-Id", "dashboard_id")
@@ -155,11 +158,11 @@ func (proxy *DataSourceProxy) HandleRequest() {
155158
reverseProxy.ServeHTTP(proxy.ctx.Resp, proxy.ctx.Req)
156159
}
157160

158-
func (proxy *DataSourceProxy) addTraceFromHeaderValue(span tracing.Span, headerName string, tagName string) {
161+
func (proxy *DataSourceProxy) addTraceFromHeaderValue(span trace.Span, headerName string, tagName string) {
159162
panelId := proxy.ctx.Req.Header.Get(headerName)
160163
dashId, err := strconv.Atoi(panelId)
161164
if err == nil {
162-
span.SetAttributes(tagName, dashId, attribute.Key(tagName).Int(dashId))
165+
span.SetAttributes(attribute.Int(tagName, dashId))
163166
}
164167
}
165168

pkg/api/pluginproxy/pluginproxy.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,10 @@ func (proxy *PluginProxy) HandleRequest() {
109109

110110
proxy.ctx.Req = proxy.ctx.Req.WithContext(ctx)
111111

112-
span.SetAttributes("user", proxy.ctx.SignedInUser.Login, attribute.Key("user").String(proxy.ctx.SignedInUser.Login))
113-
span.SetAttributes("org_id", proxy.ctx.SignedInUser.OrgID, attribute.Key("org_id").Int64(proxy.ctx.SignedInUser.OrgID))
112+
span.SetAttributes(
113+
attribute.String("user", proxy.ctx.SignedInUser.Login),
114+
attribute.Int64("org_id", proxy.ctx.SignedInUser.OrgID),
115+
)
114116

115117
proxy.tracer.Inject(ctx, proxy.ctx.Req.Header, span)
116118

pkg/bus/bus.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func (b *InProcBus) Publish(ctx context.Context, msg Msg) error {
5555
_, span := b.tracer.Start(ctx, "bus - "+msgName)
5656
defer span.End()
5757

58-
span.SetAttributes("msg", msgName, attribute.Key("msg").String(msgName))
58+
span.SetAttributes(attribute.String("msg", msgName))
5959

6060
return nil
6161
}

pkg/expr/commands.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func (gm *MathCommand) NeedsVars() []string {
7070
// failed to execute.
7171
func (gm *MathCommand) Execute(ctx context.Context, _ time.Time, vars mathexp.Vars, tracer tracing.Tracer) (mathexp.Results, error) {
7272
_, span := tracer.Start(ctx, "SSE.ExecuteMath")
73-
span.SetAttributes("expression", gm.RawExpression, attribute.Key("expression").String(gm.RawExpression))
73+
span.SetAttributes(attribute.String("expression", gm.RawExpression))
7474
defer span.End()
7575
return gm.Expression.Execute(gm.refID, vars, tracer)
7676
}
@@ -163,7 +163,7 @@ func (gr *ReduceCommand) Execute(ctx context.Context, _ time.Time, vars mathexp.
163163
_, span := tracer.Start(ctx, "SSE.ExecuteReduce")
164164
defer span.End()
165165

166-
span.SetAttributes("reducer", gr.Reducer, attribute.Key("reducer").String(gr.Reducer))
166+
span.SetAttributes(attribute.String("reducer", gr.Reducer))
167167

168168
newRes := mathexp.Results{}
169169
for i, val := range vars[gr.VarToReduce].Values {

pkg/expr/dataplane.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ func shouldUseDataplane(frames data.Frames, logger log.Logger, disable bool) (dt
6161
func handleDataplaneFrames(ctx context.Context, tracer tracing.Tracer, t data.FrameType, frames data.Frames) (mathexp.Results, error) {
6262
_, span := tracer.Start(ctx, "SSE.HandleDataPlaneData")
6363
defer span.End()
64-
span.SetAttributes("dataplane.type", t, attribute.Key("dataplane.type").String(string(t)))
64+
span.SetAttributes(attribute.String("dataplane.type", string(t)))
6565

6666
switch t.Kind() {
6767
case data.KindUnknown:

pkg/expr/graph.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,10 @@ func (dp *DataPipeline) execute(c context.Context, now time.Time, s *Service) (m
9999
}
100100

101101
c, span := s.tracer.Start(c, "SSE.ExecuteNode")
102-
span.SetAttributes("node.refId", node.RefID(), attribute.Key("node.refId").String(node.RefID()))
102+
span.SetAttributes(attribute.String("node.refId", node.RefID()))
103103
if len(node.NeedsVars()) > 0 {
104104
inputRefIDs := node.NeedsVars()
105-
span.SetAttributes("node.inputRefIDs", inputRefIDs, attribute.Key("node.inputRefIDs").StringSlice(inputRefIDs))
105+
span.SetAttributes(attribute.StringSlice("node.inputRefIDs", inputRefIDs))
106106
}
107107
defer span.End()
108108

pkg/expr/nodes.go

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@ import (
1010
"github.com/grafana/grafana-plugin-sdk-go/backend"
1111
"github.com/grafana/grafana-plugin-sdk-go/data"
1212
"go.opentelemetry.io/otel/attribute"
13+
"go.opentelemetry.io/otel/codes"
1314
"gonum.org/v1/gonum/graph/simple"
1415

1516
"github.com/grafana/grafana/pkg/expr/classic"
1617
"github.com/grafana/grafana/pkg/expr/mathexp"
1718
"github.com/grafana/grafana/pkg/infra/log"
18-
"github.com/grafana/grafana/pkg/infra/tracing"
1919
"github.com/grafana/grafana/pkg/services/datasources"
2020
"github.com/grafana/grafana/pkg/services/featuremgmt"
2121
)
@@ -236,8 +236,10 @@ func executeDSNodesGrouped(ctx context.Context, now time.Time, vars mathexp.Vars
236236
"datasourceVersion", firstNode.datasource.Version,
237237
)
238238

239-
span.SetAttributes("datasource.type", firstNode.datasource.Type, attribute.Key("datasource.type").String(firstNode.datasource.Type))
240-
span.SetAttributes("datasource.uid", firstNode.datasource.UID, attribute.Key("datasource.uid").String(firstNode.datasource.UID))
239+
span.SetAttributes(
240+
attribute.String("datasource.type", firstNode.datasource.Type),
241+
attribute.String("datasource.uid", firstNode.datasource.UID),
242+
)
241243

242244
req := &backend.QueryDataRequest{
243245
PluginContext: pCtx,
@@ -261,11 +263,8 @@ func executeDSNodesGrouped(ctx context.Context, now time.Time, vars mathexp.Vars
261263
if e != nil {
262264
responseType = "error"
263265
respStatus = "failure"
264-
span.AddEvents([]string{"error", "message"},
265-
[]tracing.EventValue{
266-
{Str: fmt.Sprintf("%v", err)},
267-
{Str: "failed to query data source"},
268-
})
266+
span.SetStatus(codes.Error, "failed to query data source")
267+
span.RecordError(e)
269268
}
270269
logger.Debug("Data source queried", "responseType", responseType)
271270
useDataplane := strings.HasPrefix(responseType, "dataplane-")
@@ -313,8 +312,10 @@ func (dn *DSNode) Execute(ctx context.Context, now time.Time, _ mathexp.Vars, s
313312
if err != nil {
314313
return mathexp.Results{}, err
315314
}
316-
span.SetAttributes("datasource.type", dn.datasource.Type, attribute.Key("datasource.type").String(dn.datasource.Type))
317-
span.SetAttributes("datasource.uid", dn.datasource.UID, attribute.Key("datasource.uid").String(dn.datasource.UID))
315+
span.SetAttributes(
316+
attribute.String("datasource.type", dn.datasource.Type),
317+
attribute.String("datasource.uid", dn.datasource.UID),
318+
)
318319

319320
req := &backend.QueryDataRequest{
320321
PluginContext: pCtx,
@@ -337,11 +338,8 @@ func (dn *DSNode) Execute(ctx context.Context, now time.Time, _ mathexp.Vars, s
337338
if e != nil {
338339
responseType = "error"
339340
respStatus = "failure"
340-
span.AddEvents([]string{"error", "message"},
341-
[]tracing.EventValue{
342-
{Str: fmt.Sprintf("%v", err)},
343-
{Str: "failed to query data source"},
344-
})
341+
span.SetStatus(codes.Error, "failed to query data source")
342+
span.RecordError(e)
345343
}
346344
logger.Debug("Data source queried", "responseType", responseType)
347345
useDataplane := strings.HasPrefix(responseType, "dataplane-")

pkg/infra/httpclient/httpclientprovider/tracing_middleware.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace"
1111
"go.opentelemetry.io/otel/attribute"
1212
"go.opentelemetry.io/otel/codes"
13+
semconv "go.opentelemetry.io/otel/semconv/v1.17.0"
1314
"go.opentelemetry.io/otel/trace"
1415

1516
"github.com/grafana/grafana/pkg/infra/log"
@@ -30,17 +31,18 @@ func TracingMiddleware(logger log.Logger, tracer tracing.Tracer) httpclient.Midd
3031
ctx = httptrace.WithClientTrace(ctx, otelhttptrace.NewClientTrace(ctx, otelhttptrace.WithoutSubSpans(), otelhttptrace.WithoutHeaders()))
3132
req = req.WithContext(ctx)
3233
for k, v := range opts.Labels {
33-
span.SetAttributes(k, v, attribute.Key(k).String(v))
34+
span.SetAttributes(attribute.String(k, v))
3435
}
3536

3637
tracer.Inject(ctx, req.Header, span)
3738
res, err := next.RoundTrip(req)
3839

39-
span.SetAttributes("http.url", req.URL.String(), attribute.String("http.url", req.URL.String()))
40-
span.SetAttributes("http.method", req.Method, attribute.String("http.method", req.Method))
40+
span.SetAttributes(semconv.HTTPURL(req.URL.String()))
41+
span.SetAttributes(semconv.HTTPMethod(req.Method))
4142
// ext.SpanKind.Set(span, ext.SpanKindRPCClientEnum)
4243

4344
if err != nil {
45+
span.SetStatus(codes.Error, "request failed")
4446
span.RecordError(err)
4547
return res, err
4648
}
@@ -49,10 +51,10 @@ func TracingMiddleware(logger log.Logger, tracer tracing.Tracer) httpclient.Midd
4951
// we avoid measuring contentlength less than zero because it indicates
5052
// that the content size is unknown. https://godoc.org/github.com/badu/http#Response
5153
if res.ContentLength > 0 {
52-
span.SetAttributes(httpContentLengthTagKey, res.ContentLength, attribute.Key(httpContentLengthTagKey).Int64(res.ContentLength))
54+
span.SetAttributes(attribute.Int64(httpContentLengthTagKey, res.ContentLength))
5355
}
5456

55-
span.SetAttributes("http.status_code", res.StatusCode, attribute.Int("http.status_code", res.StatusCode))
57+
span.SetAttributes(semconv.HTTPStatusCode(res.StatusCode))
5658
if res.StatusCode >= 400 {
5759
span.SetStatus(codes.Error, fmt.Sprintf("error with HTTP status code %s", strconv.Itoa(res.StatusCode)))
5860
}

0 commit comments

Comments
 (0)