From 29be3ef88c0e4e093c6563f50237c37d5ab14a39 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 17 Nov 2023 08:21:50 -0500 Subject: [PATCH] Move heap_size batching code into pair of functions (#51611) Co-authored-by: Gabriel Baraldi --- src/gc.c | 126 ++++++++++++++++------------------------------- src/staticdata.c | 11 ++--- 2 files changed, 47 insertions(+), 90 deletions(-) diff --git a/src/gc.c b/src/gc.c index c68d4e92f9f57..bc0361c8ac7a8 100644 --- a/src/gc.c +++ b/src/gc.c @@ -700,6 +700,7 @@ static uint64_t gc_end_time = 0; static int thrash_counter = 0; static int thrashing = 0; // global variables for GC stats +static uint64_t freed_in_runtime = 0; // Resetting the object to a young object, this is used when marking the // finalizer list to collect them the next time because the object is very @@ -1005,6 +1006,22 @@ static void sweep_weak_refs(void) } +STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT +{ + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc) + sz; + if (alloc_acc < 16*1024) + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc); + else { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc); + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); + } +} + +STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT +{ + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_num.free_acc) + sz); +} + // big value list // Size includes the tag and the tag is not cleared!! @@ -1027,13 +1044,7 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + allocsz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + allocsz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + allocsz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, allocsz); #ifdef MEMDEBUG memset(v, 0xee, allocsz); #endif @@ -1147,16 +1158,10 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT jl_ptls_t ptls = jl_current_task->ptls; jl_atomic_store_relaxed(&ptls->gc_num.allocd, jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, sz); } - -static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT +// Only safe to update the heap inside the GC +static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT { int gc_n_threads; jl_ptls_t* gc_all_tls_states; @@ -1170,12 +1175,14 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc); dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc); dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc - free_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); + if (update_heap) { + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); + freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_num.free_acc); + jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); + } } } } @@ -1209,7 +1216,7 @@ static int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT { - combine_thread_gc_counts(&gc_num); + combine_thread_gc_counts(&gc_num, 0); inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd); gc_num.allocd = 0; gc_num.deferred_alloc = 0; @@ -3176,7 +3183,7 @@ JL_DLLEXPORT int jl_gc_is_enabled(void) JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT { jl_gc_num_t num = gc_num; - combine_thread_gc_counts(&num); + combine_thread_gc_counts(&num, 0); // Sync this logic with `base/util.jl:GC_Diff` *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); } @@ -3189,7 +3196,7 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { jl_gc_num_t num = gc_num; - combine_thread_gc_counts(&num); + combine_thread_gc_counts(&num, 0); return num; } @@ -3248,7 +3255,7 @@ size_t jl_maxrss(void); // Only one thread should be running in this function static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) { - combine_thread_gc_counts(&gc_num); + combine_thread_gc_counts(&gc_num, 1); // We separate the update of the graph from the update of live_bytes here // so that the sweep shows a downward trend in memory usage. @@ -3432,6 +3439,8 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) gc_num.last_incremental_sweep = gc_end_time; } + jl_atomic_store_relaxed(&gc_heap_stats.heap_size, jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_in_runtime); + freed_in_runtime = 0; size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size); double target_allocs = 0.0; double min_interval = default_collect_interval; @@ -3780,13 +3789,7 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, sz); } return data; } @@ -3803,13 +3806,7 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz * nm); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz * nm); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, sz * nm); } return data; } @@ -3820,14 +3817,7 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) jl_task_t *ct = jl_current_task; free(p); if (pgcstack != NULL && ct->world_age) { - jl_ptls_t ptls = ct->ptls; - uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - if (free_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + sz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + sz)); - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); - } + jl_batch_accum_free_size(ct->ptls, sz); } } @@ -3847,23 +3837,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size int64_t diff = sz - old; if (diff < 0) { - diff = -diff; - uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - if (free_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + diff)); - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); - } + jl_batch_accum_free_size(ptls, -diff); } else { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, diff); } } return data; @@ -3948,13 +3925,7 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + allocsz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + allocsz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + allocsz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, allocsz); #ifdef _OS_WINDOWS_ SetLastError(last_error); #endif @@ -4002,23 +3973,10 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds int64_t diff = allocsz - oldsz; if (diff < 0) { - diff = -diff; - uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - if (free_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + diff)); - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); - } + jl_batch_accum_free_size(ptls, -diff); } else { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, diff); } if (allocsz > oldsz) { maybe_record_alloc_to_profile((jl_value_t*)b, allocsz - oldsz, (jl_datatype_t*)jl_buff_tag); diff --git a/src/staticdata.c b/src/staticdata.c index f34f64b28e321..42a5fdb63b63e 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -71,7 +71,6 @@ External links: */ #include #include -#include #include // printf #include // PRIxPTR @@ -3522,7 +3521,7 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_ } // TODO?: refactor to make it easier to create the "package inspector" -static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc) +static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, int needs_permalloc) { JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg); jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, pkgname); @@ -3548,7 +3547,7 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i JL_SIGATOMIC_BEGIN(); size_t len = dataendpos - datastartpos; char *sysimg; - bool success = !needs_permalloc; + int success = !needs_permalloc; ios_seek(f, datastartpos); if (needs_permalloc) sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); @@ -3608,7 +3607,7 @@ static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image, uin jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); } -JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(void* pkgimage_handle, const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc) +JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(void* pkgimage_handle, const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname, int needs_permalloc) { ios_t f; ios_static_buffer(&f, (char*)buf, sz); @@ -3625,7 +3624,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *d "Cache file \"%s\" not found.\n", fname); } jl_image_t pkgimage = {}; - jl_value_t *ret = jl_restore_package_image_from_stream(NULL, &f, &pkgimage, depmods, completeinfo, pkgname, true); + jl_value_t *ret = jl_restore_package_image_from_stream(NULL, &f, &pkgimage, depmods, completeinfo, pkgname, 1); ios_close(&f); return ret; } @@ -3700,7 +3699,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j memset(&pkgimage.fptrs, 0, sizeof(pkgimage.fptrs)); } - jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_handle, pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname, false); + jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_handle, pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname, 0); return mod; }