Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ddtrace/tracer: update log msg to accurately count dropped traces #2845

Merged
merged 19 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ddtrace/tracer/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func TestReportHealthMetrics(t *testing.T) {
assert.Equal(int64(1), counts["datadog.tracer.spans_started"])
assert.Equal(int64(1), counts["datadog.tracer.spans_finished"])
assert.Equal(int64(0), counts["datadog.tracer.traces_dropped"])
assert.Equal(int64(0), counts["datadog.tracer.total_traces_dropped"])
}

func TestTracerMetrics(t *testing.T) {
Expand Down
6 changes: 6 additions & 0 deletions ddtrace/tracer/spancontext.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ type trace struct {
priority *float64 // sampling priority
locked bool // specifies if the sampling priority can be altered
samplingDecision samplingDecision // samplingDecision indicates whether to send the trace to the agent.
dropped bool // specifies if part or all of the trace has been dropped

// root specifies the root of the trace, if known; it is nil when a span
// context is extracted from a carrier, at which point there are no spans in
Expand Down Expand Up @@ -483,6 +484,8 @@ func (t *trace) finishedOne(s *span) {
t.finishChunk(tr, &chunk{
spans: t.spans,
willSend: decisionKeep == samplingDecision(atomic.LoadUint32((*uint32)(&t.samplingDecision))),
traceID: s.TraceID,
dropped: t.dropped,
})
t.spans = nil
return
Expand Down Expand Up @@ -514,13 +517,16 @@ func (t *trace) finishedOne(s *span) {
t.finishChunk(tr, &chunk{
spans: finishedSpans,
willSend: decisionKeep == samplingDecision(atomic.LoadUint32((*uint32)(&t.samplingDecision))),
traceID: s.TraceID,
dropped: t.dropped,
})
t.spans = leftoverSpans
}

func (t *trace) finishChunk(tr *tracer, ch *chunk) {
atomic.AddUint32(&tr.spansFinished, uint32(len(ch.spans)))
tr.pushChunk(ch)
t.dropped = ch.dropped
t.finished = 0 // important, because a buffer can be used for several flushes
}

Expand Down
22 changes: 21 additions & 1 deletion ddtrace/tracer/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ type tracer struct {
// finished, and dropped
spansStarted, spansFinished, tracesDropped uint32

// Keeps track of the total number of traces dropped for accurate logging.
totalTracesDropped struct {
mu sync.Mutex
count uint32
}

// Records the number of dropped P0 traces and spans.
droppedP0Traces, droppedP0Spans uint32

Expand Down Expand Up @@ -401,6 +407,12 @@ func (t *tracer) worker(tick <-chan time.Time) {
}
return
}

t.totalTracesDropped.mu.Lock()
if t.totalTracesDropped.count > 0 {
log.Error("%d traces dropped through payload queue", t.totalTracesDropped.count)
}
t.totalTracesDropped.mu.Unlock()
hannahkm marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand All @@ -410,6 +422,8 @@ func (t *tracer) worker(tick <-chan time.Time) {
type chunk struct {
spans []*span
willSend bool // willSend indicates whether the trace will be sent to the agent.
traceID uint64
dropped bool // indicates if the parent trace has already been dropped
}

// sampleChunk applies single-span sampling to the provided trace.
Expand Down Expand Up @@ -451,7 +465,13 @@ func (t *tracer) pushChunk(trace *chunk) {
select {
case t.out <- trace:
default:
log.Error("payload queue full, dropping %d traces", len(trace.spans))
log.Debug("payload queue full, trace %d dropped %d spans", trace.traceID, len(trace.spans))
if !trace.dropped {
t.totalTracesDropped.mu.Lock()
defer t.totalTracesDropped.mu.Unlock()
hannahkm marked this conversation as resolved.
Show resolved Hide resolved
t.totalTracesDropped.count += 1
trace.dropped = true
}
}
}

Expand Down
25 changes: 23 additions & 2 deletions ddtrace/tracer/tracer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1591,11 +1591,11 @@ func TestPushPayload(t *testing.T) {
s.Meta["key"] = strings.Repeat("X", payloadSizeLimit/2+10)

// half payload size reached
tracer.pushChunk(&chunk{[]*span{s}, true})
tracer.pushChunk(&chunk{[]*span{s}, true, s.TraceID, false})
tracer.awaitPayload(t, 1)

// payload size exceeded
tracer.pushChunk(&chunk{[]*span{s}, true})
tracer.pushChunk(&chunk{[]*span{s}, true, s.TraceID, false})
flush(2)
}

Expand Down Expand Up @@ -1632,6 +1632,27 @@ func TestPushTrace(t *testing.T) {
assert.Len(tracer.out, payloadQueueSize)
log.Flush()
assert.True(len(tp.Logs()) >= 1)

t.Run("overload", func(t *testing.T) {
tracer := newUnstartedTracer()
defer tracer.statsd.Close()

root := newSpan("name", "service", "resource", 0, 0, 0)
trace := root.context.trace

many := payloadQueueSize + 2
for i := 0; i < many; i++ {
trace.mu.Lock()
c := chunk{spans: make([]*span, i),
traceID: trace.root.TraceID,
dropped: trace.dropped,
}
tracer.pushChunk(&c)
trace.dropped = c.dropped
trace.mu.Unlock()
}
assert.Equal(uint32(1), tracer.totalTracesDropped.count)
})
}

func TestTracerFlush(t *testing.T) {
Expand Down
Loading