fix(profiling): always report CPU time regardless of Thread running state (#16273)

KowalskiThomas · web-flow · commit 031905b4e237 · 2026-01-30T15:44:06.000+01:00
## Description This PR updates the Python Profiler logic to always report CPU Time for sampled Threads, regardless of whether they were running when we computed their spent CPU time. ### Why? In [Incident 48491](https://app.datadoghq.com/incidents/48491), we investigated an issue where following a `ddtrace` upgrade, one of our Python services suddenly started reporting near zero CPU Time and no Frames at all/empty flame graphs. The investigation showed that this changed if we removed the check on `is_running` and always reported CPU Time for each sampled Thread. Originally, this check was introduced to avoid reporting CPU Time on Stacks that were in fact idle states; however it also means it the Thread runs between two Samples but then does not run when we "check whether it is running", then we are blind to what it did during that time. On top of that, there is no reliable way to check whether a Thread is running – checking this is by definition racy, and although there are potential ways around it, we could never guarantee that the running state we determined matched the one that the Thread was in when we captured its Stack, meaning we could have discrepancies anyway. Finally, several other Datadog Profilers have this bias and accept it, so we should probably match that behaviour. **Note** the performance difference will probably be negligible, but this change may actually slightly improve our CPU usage as we will now make one less system call.
diff --git a/ddtrace/internal/datadog/profiling/stack/echion/echion/threads.h b/ddtrace/internal/datadog/profiling/stack/echion/echion/threads.h
@@ -54,13 +54,11 @@ class ThreadInfo
     mach_port_t mach_port;
 #endif
     microsecond_t cpu_time;
-    bool running_ = false;
 
     uintptr_t asyncio_loop = 0;
     uintptr_t tstate_addr = 0; // Remote address of PyThreadState for accessing asyncio_tasks_head
 
     [[nodiscard]] Result<void> update_cpu_time();
-    bool is_running();
 
     [[nodiscard]] Result<void> sample(EchionSampler&, int64_t, PyThreadState*, microsecond_t);
     void unwind(EchionSampler&, PyThreadState*);
diff --git a/ddtrace/internal/datadog/profiling/stack/src/echion/threads.cc b/ddtrace/internal/datadog/profiling/stack/src/echion/threads.cc
@@ -584,7 +584,7 @@ ThreadInfo::sample(EchionSampler& echion, int64_t iid, PyThreadState* tstate, mi
         return ErrorKind::CpuTimeError;
     }
 
-    Renderer::get().render_cpu_time(is_running() ? cpu_time - previous_cpu_time : 0);
+    Renderer::get().render_cpu_time(cpu_time - previous_cpu_time);
 
     this->unwind(echion, tstate);
 
@@ -640,29 +640,18 @@ Result<void>
 ThreadInfo::update_cpu_time()
 {
 #if defined PL_LINUX
-    struct timespec ts1;
-    if (clock_gettime(cpu_clock_id, &ts1)) {
+    struct timespec ts;
+    if (clock_gettime(cpu_clock_id, &ts)) {
         // If the clock is invalid, we skip updating the CPU time.
         // This can happen if we try to compute CPU time for a thread that has exited.
         if (errno == EINVAL) {
-            this->running_ = false;
             return Result<void>::ok();
         }
 
         return ErrorKind::CpuTimeError;
     }
 
-    this->cpu_time = TS_TO_MICROSECOND(ts1);
-
-    // Determine if running by checking if CPU time advances between two back-to-back
-    // measurements. This is done here to avoid a separate is_running() call with
-    // its own syscalls (reduces 3 syscalls per thread to 2).
-    struct timespec ts2;
-    if (clock_gettime(cpu_clock_id, &ts2) != 0) {
-        this->running_ = false;
-    } else {
-        this->running_ = (ts1.tv_sec != ts2.tv_sec || ts1.tv_nsec != ts2.tv_nsec);
-    }
+    this->cpu_time = TS_TO_MICROSECOND(ts);
 #elif defined PL_DARWIN
     thread_basic_info_data_t info;
     mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
@@ -673,33 +662,22 @@ ThreadInfo::update_cpu_time()
         // If the thread is invalid, we skip updating the CPU time.
         // This can happen if we try to compute CPU time for a thread that has exited.
         if (kr == KERN_INVALID_ARGUMENT) {
-            this->running_ = false;
             return Result<void>::ok();
         }
 
         return ErrorKind::CpuTimeError;
     }
 
     if (info.flags & TH_FLAGS_IDLE) {
-        this->running_ = false;
         return Result<void>::ok();
     }
 
     this->cpu_time = TV_TO_MICROSECOND(info.user_time) + TV_TO_MICROSECOND(info.system_time);
-    // On macOS, thread_info already gives us run_state, so no need to check if the clock is advancing
-    this->running_ = (info.run_state == TH_STATE_RUNNING);
 #endif
 
     return Result<void>::ok();
 }
 
-bool
-ThreadInfo::is_running()
-{
-    // Running state is computed in update_cpu_time by taking two back-to-back measurements of the CPU time.
-    return this->running_;
-}
-
 void
 for_each_thread(EchionSampler& echion, InterpreterInfo& interp, PyThreadStateCallback callback)
 {
diff --git a/releasenotes/notes/profiling-always-report-cpu-time-5f9aa07e648801fc.yaml b/releasenotes/notes/profiling-always-report-cpu-time-5f9aa07e648801fc.yaml
@@ -0,0 +1,4 @@
+fixes:
+  - |
+    profiling: The Profiler now always reports CPU time for threads, regardless of whether they are running when
+    the sample is captured.

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +fixes:
 +  - |
 +    profiling: The Profiler now always reports CPU time for threads, regardless of whether they are running when
 +    the sample is captured.