From 3961f7fb1c2f2b1d170db287d092e8ef9ffe7271 Mon Sep 17 00:00:00 2001 From: Mikayla Toffler <46911781+mtoffl01@users.noreply.github.com> Date: Mon, 23 Dec 2024 14:49:50 -0500 Subject: [PATCH 1/2] chore: rename reportHealthMetrics function (#3040) --- ddtrace/tracer/metrics.go | 4 +++- ddtrace/tracer/metrics_test.go | 2 +- ddtrace/tracer/tracer.go | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ddtrace/tracer/metrics.go b/ddtrace/tracer/metrics.go index 409d8a439a..9cd17a968b 100644 --- a/ddtrace/tracer/metrics.go +++ b/ddtrace/tracer/metrics.go @@ -85,7 +85,9 @@ func (t *tracer) reportRuntimeMetrics(interval time.Duration) { } } -func (t *tracer) reportHealthMetrics(interval time.Duration) { +// reportHealthMetricsAtInterval reports noisy health metrics at the specified interval. +// The periodic reporting ensures metrics are delivered without overwhelming the system or logs. +func (t *tracer) reportHealthMetricsAtInterval(interval time.Duration) { ticker := time.NewTicker(interval) defer ticker.Stop() for { diff --git a/ddtrace/tracer/metrics_test.go b/ddtrace/tracer/metrics_test.go index 64548fce86..16a40e8492 100644 --- a/ddtrace/tracer/metrics_test.go +++ b/ddtrace/tracer/metrics_test.go @@ -43,7 +43,7 @@ func TestReportRuntimeMetrics(t *testing.T) { assert.Contains(calls, "runtime.go.gc_stats.pause_quantiles.75p") } -func TestReportHealthMetrics(t *testing.T) { +func TestReportHealthMetricsAtInterval(t *testing.T) { assert := assert.New(t) var tg statsdtest.TestStatsdClient diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index a345fd6366..25e08b7cb3 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -376,7 +376,7 @@ func newTracer(opts ...StartOption) *tracer { t.wg.Add(1) go func() { defer t.wg.Done() - t.reportHealthMetrics(statsInterval) + t.reportHealthMetricsAtInterval(statsInterval) }() t.stats.Start() return t From 4f57a472cc1b5877db967ff5d157231dcfdd82ef Mon Sep 17 00:00:00 2001 From: Mikayla Toffler <46911781+mtoffl01@users.noreply.github.com> Date: Mon, 23 Dec 2024 14:58:04 -0500 Subject: [PATCH 2/2] Report datadog.tracer.abandoned_spans health metric (#3032) --- ddtrace/tracer/abandonedspans.go | 28 ++++++++++---- ddtrace/tracer/abandonedspans_test.go | 55 ++++++++++++++++++++++++++- internal/statsdtest/statsdtest.go | 41 +++++++++++++++++++- 3 files changed, 114 insertions(+), 10 deletions(-) diff --git a/ddtrace/tracer/abandonedspans.go b/ddtrace/tracer/abandonedspans.go index defad41831..9ccf1ca045 100644 --- a/ddtrace/tracer/abandonedspans.go +++ b/ddtrace/tracer/abandonedspans.go @@ -14,6 +14,8 @@ import ( "sync/atomic" "time" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" ) @@ -77,27 +79,36 @@ type abandonedSpanCandidate struct { TraceID, SpanID uint64 Start int64 Finished bool + Integration string } func newAbandonedSpanCandidate(s *span, finished bool) *abandonedSpanCandidate { + var component string + if v, ok := s.Meta[ext.Component]; ok { + component = v + } else { + component = "manual" + } // finished is explicit instead of implicit as s.finished may be not set // at the moment of calling this method. // Also, locking is not required as it's called while the span is already locked or it's // being initialized. - return &abandonedSpanCandidate{ - Name: s.Name, - TraceID: s.TraceID, - SpanID: s.SpanID, - Start: s.Start, - Finished: finished, + c := &abandonedSpanCandidate{ + Name: s.Name, + TraceID: s.TraceID, + SpanID: s.SpanID, + Start: s.Start, + Finished: finished, + Integration: component, } + return c } // String takes a span and returns a human-readable string representing that span. func (s *abandonedSpanCandidate) String() string { age := now() - s.Start a := fmt.Sprintf("%d sec", age/1e9) - return fmt.Sprintf("[name: %s, span_id: %d, trace_id: %d, age: %s],", s.Name, s.SpanID, s.TraceID, a) + return fmt.Sprintf("[name: %s, integration: %s, span_id: %d, trace_id: %d, age: %s],", s.Name, s.Integration, s.SpanID, s.TraceID, a) } type abandonedSpansDebugger struct { @@ -292,6 +303,9 @@ func formatAbandonedSpans(b *bucket[uint64, *abandonedSpanCandidate], interval * if interval != nil && curTime-s.Start < interval.Nanoseconds() { continue } + if t, ok := internal.GetGlobalTracer().(*tracer); ok { + t.statsd.Incr("datadog.tracer.abandoned_spans", []string{"name:" + s.Name, "integration:" + s.Integration}, 1) + } spanCount++ msg := s.String() sb.WriteString(msg) diff --git a/ddtrace/tracer/abandonedspans_test.go b/ddtrace/tracer/abandonedspans_test.go index 745f03f222..a929961df2 100644 --- a/ddtrace/tracer/abandonedspans_test.go +++ b/ddtrace/tracer/abandonedspans_test.go @@ -12,7 +12,9 @@ import ( "testing" "time" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/statsdtest" "gopkg.in/DataDog/dd-trace-go.v1/internal/version" "github.com/stretchr/testify/assert" @@ -58,11 +60,62 @@ func assertProcessedSpans(assert *assert.Assertions, t *tracer, startedSpans, fi func formatSpanString(s *span) string { s.Lock() - msg := fmt.Sprintf("[name: %s, span_id: %d, trace_id: %d, age: %s],", s.Name, s.SpanID, s.TraceID, spanAge(s)) + var integration string + if v, ok := s.Meta[ext.Component]; ok { + integration = v + } else { + integration = "manual" + } + msg := fmt.Sprintf("[name: %s, integration: %s, span_id: %d, trace_id: %d, age: %s],", s.Name, integration, s.SpanID, s.TraceID, spanAge(s)) s.Unlock() return msg } +func TestAbandonedSpansMetric(t *testing.T) { + assert := assert.New(t) + var tg statsdtest.TestStatsdClient + tp := new(log.RecordLogger) + tickerInterval = 100 * time.Millisecond + t.Run("finished", func(t *testing.T) { + tp.Reset() + tg.Reset() + defer setTestTime()() + tracer, _, _, stop := startTestTracer(t, WithLogger(tp), WithDebugSpansMode(500*time.Millisecond), withStatsdClient(&tg)) + defer stop() + s := tracer.StartSpan("operation", StartTime(spanStart)).(*span) + s.Finish() + assertProcessedSpans(assert, tracer, 1, 1) + assert.Empty(tg.GetCallsByName("datadog.tracer.abandoned_spans")) + }) + t.Run("open", func(t *testing.T) { + tp.Reset() + tg.Reset() + defer setTestTime()() + tracer, _, _, stop := startTestTracer(t, WithLogger(tp), WithDebugSpansMode(500*time.Millisecond), withStatsdClient(&tg)) + defer stop() + tracer.StartSpan("operation", StartTime(spanStart), Tag(ext.Component, "some_integration_name")) + assertProcessedSpans(assert, tracer, 1, 0) + calls := tg.GetCallsByName("datadog.tracer.abandoned_spans") + assert.Len(calls, 1) + call := calls[0] + assert.Equal([]string{"name:operation", "integration:some_integration_name"}, call.Tags()) + }) + t.Run("both", func(t *testing.T) { + tp.Reset() + tg.Reset() + defer setTestTime()() + tracer, _, _, stop := startTestTracer(t, WithLogger(tp), WithDebugSpansMode(500*time.Millisecond), withStatsdClient(&tg)) + defer stop() + sf := tracer.StartSpan("op", StartTime(spanStart)).(*span) + sf.Finish() + s := tracer.StartSpan("op2", StartTime(spanStart)).(*span) + assertProcessedSpans(assert, tracer, 2, 1) + calls := tg.GetCallsByName("datadog.tracer.abandoned_spans") + assert.Len(calls, 1) + s.Finish() + }) +} + func TestReportAbandonedSpans(t *testing.T) { assert := assert.New(t) tp := new(log.RecordLogger) diff --git a/internal/statsdtest/statsdtest.go b/internal/statsdtest/statsdtest.go index bed3646fbc..e31cdb4a9f 100644 --- a/internal/statsdtest/statsdtest.go +++ b/internal/statsdtest/statsdtest.go @@ -49,8 +49,16 @@ type TestStatsdCall struct { rate float64 } -func (c *TestStatsdCall) Tags() []string { - return c.tags +func (t TestStatsdCall) Name() string { + return t.name +} + +func (t TestStatsdCall) Tags() []string { + return t.tags +} + +func (t TestStatsdCall) IntVal() int64 { + return t.intVal } func (tg *TestStatsdClient) addCount(name string, value int64) { @@ -225,6 +233,35 @@ func (tg *TestStatsdClient) CallsByName() map[string]int { return counts } +// GetCallsByName returns a slice of TestStatsdCalls with the provided name on the TestStatsdClient +// It's useful if you want to use any TestStatsdCall method calls on the result(s) +func (tg *TestStatsdClient) GetCallsByName(name string) (calls []TestStatsdCall) { + tg.mu.RLock() + defer tg.mu.RUnlock() + for _, c := range tg.gaugeCalls { + if c.Name() == name { + calls = append(calls, c) + } + } + for _, c := range tg.incrCalls { + if c.Name() == name { + calls = append(calls, c) + } + } + for _, c := range tg.countCalls { + if c.Name() == name { + calls = append(calls, c) + } + } + for _, c := range tg.timingCalls { + if c.Name() == name { + calls = append(calls, c) + } + } + return calls +} + +// FilterCallsByName returns a slice of TestStatsdCalls with the provided name, from the list of provided TestStatsdCalls func FilterCallsByName(calls []TestStatsdCall, name string) []TestStatsdCall { var matches []TestStatsdCall for _, c := range calls {