Skip to content

Commit 5f1aefc

Browse files
authored
refactor(profiling): add debug check that the GIL is held to memalloc (#11860)
The allocation profiler functions should (in theory) only be called when allocating Python objects, which should (in theory) only be done with the GIL held. We have reason to believe that this code is sometimes reached without the GIL held, since some crashes in the code seem to go away with our own locking. Add a debug flag, `_DD_PROFILING_MEMALLOC_CRASH_ON_NO_GIL`, that when set will make the profiler crash if it detects the GIL is not held in places where we think it ought to be.
1 parent 268d3f4 commit 5f1aefc

File tree

3 files changed

+44
-22
lines changed

3 files changed

+44
-22
lines changed

ddtrace/profiling/collector/_memalloc.c

+17-11
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#define PY_SSIZE_T_CLEAN
66
#include <Python.h>
77

8+
#include "_memalloc_debug.h"
89
#include "_memalloc_heap.h"
910
#include "_memalloc_reentrant.h"
1011
#include "_memalloc_tb.h"
@@ -48,7 +49,13 @@ static PyObject* object_string = NULL;
4849
// We add an option here to _add_ a crash, in order to observe this condition in a future diagnostic iteration.
4950
// **This option is _intended_ to crash the Python process** do not use without a good reason!
5051
static char g_crash_on_mutex_pass_str[] = "_DD_PROFILING_MEMALLOC_CRASH_ON_MUTEX_PASS";
51-
static const char* g_truthy_values[] = { "1", "true", "yes", "on", "enable", "enabled", NULL }; // NB the sentinel NULL
52+
// The allocation profiler functions should (in theory) only be called when allocating Python
53+
// objects, which should (in theory) only be done with the GIL held. We have reason to believe
54+
// that this code is sometimes reached without the GIL held, since some crashes in the code
55+
// seem to go away with our own locking. This debug flag will make the profiler crash if
56+
// it detects the GIL is not held in places where we think it ought to be.
57+
static char g_crash_on_no_gil_str[] = "_DD_PROFILING_MEMALLOC_CRASH_ON_NO_GIL";
58+
static bool g_crash_on_no_gil = false;
5259
static memlock_t g_memalloc_lock;
5360

5461
static alloc_tracker_t* global_alloc_tracker;
@@ -92,25 +99,24 @@ static void
9299
memalloc_init()
93100
{
94101
// Check if we should crash the process on mutex pass
95-
char* crash_on_mutex_pass_str = getenv(g_crash_on_mutex_pass_str);
96-
bool crash_on_mutex_pass = false;
97-
if (crash_on_mutex_pass_str) {
98-
for (int i = 0; g_truthy_values[i]; i++) {
99-
if (strcmp(crash_on_mutex_pass_str, g_truthy_values[i]) == 0) {
100-
crash_on_mutex_pass = true;
101-
break;
102-
}
103-
}
104-
}
102+
bool crash_on_mutex_pass = memalloc_get_bool_env(g_crash_on_mutex_pass_str);
105103
memlock_init(&g_memalloc_lock, crash_on_mutex_pass);
106104
#ifndef _WIN32
107105
pthread_atfork(memalloc_prefork, memalloc_postfork_parent, memalloc_postfork_child);
108106
#endif
107+
108+
g_crash_on_no_gil = memalloc_get_bool_env(g_crash_on_no_gil_str);
109109
}
110110

111111
static void
112112
memalloc_add_event(memalloc_context_t* ctx, void* ptr, size_t size)
113113
{
114+
if (g_crash_on_no_gil && !PyGILState_Check()) {
115+
int* p = NULL;
116+
*p = 0;
117+
abort(); // should never reach here
118+
}
119+
114120
uint64_t alloc_count = atomic_add_clamped(&global_alloc_tracker->alloc_count, 1, ALLOC_TRACKER_MAX_COUNT);
115121

116122
/* Return if we've reached the maximum number of allocations */
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#ifndef _DDTRACE_MEMALLOC_DEBUG_H
2+
#define _DDTRACE_MEMALLOC_DEBUG_H
3+
4+
#include <stdbool.h>
5+
#include <stdlib.h>
6+
#include <string.h>
7+
8+
static const char* g_truthy_values[] = { "1", "true", "yes", "on", "enable", "enabled", NULL }; // NB the sentinel NULL
9+
10+
static bool
11+
memalloc_get_bool_env(char* key)
12+
{
13+
char* val = getenv(key);
14+
if (!val) {
15+
return false;
16+
}
17+
for (int i = 0; g_truthy_values[i]; i++) {
18+
if (strcmp(val, g_truthy_values[i]) == 0) {
19+
return true;
20+
}
21+
}
22+
return false;
23+
}
24+
25+
#endif

ddtrace/profiling/collector/_memalloc_heap.c

+2-11
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <stdlib.h>
33

44
#define PY_SSIZE_T_CLEAN
5+
#include "_memalloc_debug.h"
56
#include "_memalloc_heap.h"
67
#include "_memalloc_reentrant.h"
78
#include "_memalloc_tb.h"
@@ -27,7 +28,6 @@ typedef struct
2728
} heap_tracker_t;
2829

2930
static char g_crash_on_mutex_pass_str[] = "_DD_PROFILING_MEMHEAP_CRASH_ON_MUTEX_PASS";
30-
static const char* g_truthy_values[] = { "1", "true", "yes", "on", "enable", "enabled", NULL }; // NB the sentinel NULL
3131
static memlock_t g_memheap_lock;
3232

3333
static heap_tracker_t global_heap_tracker;
@@ -68,16 +68,7 @@ static void
6868
memheap_init()
6969
{
7070
// Check if we should crash the process on mutex pass
71-
char* crash_on_mutex_pass_str = getenv(g_crash_on_mutex_pass_str);
72-
bool crash_on_mutex_pass = false;
73-
if (crash_on_mutex_pass_str) {
74-
for (int i = 0; g_truthy_values[i]; i++) {
75-
if (strcmp(crash_on_mutex_pass_str, g_truthy_values[i]) == 0) {
76-
crash_on_mutex_pass = true;
77-
break;
78-
}
79-
}
80-
}
71+
bool crash_on_mutex_pass = memalloc_get_bool_env(g_crash_on_mutex_pass_str);
8172
memlock_init(&g_memheap_lock, crash_on_mutex_pass);
8273
#ifndef _WIN32
8374
pthread_atfork(memheap_prefork, memheap_postfork_parent, memheap_postfork_child);

0 commit comments

Comments
 (0)