Skip to content

Commit

Permalink
Pin old content holder while creating {str/obj}buf
Browse files Browse the repository at this point in the history
Fixed a problem in rb_mmtk_ary_new_objbuf_copy where GC is triggered
when creating imemo:mmtk_objbuf, and the `src` argument is not
forwarded.  We now pin the object `src` points into (if any).

The same fix is applied for rb_mmtk_str_new_strbuf_copy.
  • Loading branch information
wks committed Nov 23, 2023
1 parent b4cc207 commit f58ec50
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 20 deletions.
35 changes: 24 additions & 11 deletions array.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,21 +245,28 @@ rb_mmtk_ary_copy_objbuf(VALUE dst_ary, VALUE src_ary)
rb_mmtk_ary_set_objbuf(dst_ary, RARRAY_EXT(src_ary)->objbuf);
}

// Attach a heap array with a newly allocated imemo:mmtk_objbuf and copy over the contents
// Note that `ary` may be an existing array and `src` may point into `ary` or its existing
// buffer. Do not modify `ary` until the new strbuf is fully written.
// Attach a heap array `ary` with a newly allocated imemo:mmtk_objbuf of the given capacity `capa`.
// The frst `copy_len` elements of the new objbuf is copied from `src`, and `copy_len` must not
// exceed `capa`.
//
// `src` may point to an element of another heap object, in which case `src_obj` must point to the
// object into which `src` is pointed, and `src_obj` will be pinned during the execution of this
// function. If `src` does not point into another heap object, `src_obj` may be 0.
//
// Note: capa is the number of elements in the newly created buffer.
// copy_len is the number of elements to copy, not the number of bytes.
static inline void
rb_mmtk_ary_new_objbuf_copy(VALUE ary, size_t capa, const VALUE *src, size_t copy_len)
rb_mmtk_ary_new_objbuf_copy(VALUE ary, size_t capa, VALUE src_obj, const VALUE *src, size_t copy_len)
{
RUBY_ASSERT(rb_mmtk_enabled_p());

// When using MMTk, as.heap.ptr points to the ary field of a rb_mmtk_objbuf_t
// which is allocated in the heap as an imemo:mmtk_objbuf.
rb_mmtk_objbuf_t *objbuf = rb_mmtk_new_objbuf(capa);
rb_mmtk_objbuf_t *objbuf = rb_mmtk_new_objbuf(capa); // This may trigger GC, causing objects to be moved.
VALUE *elems = rb_mmtk_objbuf_to_elems(objbuf);

// Note that `ary` may be an existing array and `src` may point into `ary` or its existing
// buffer. Do not modify `ary` until the new strbuf is fully written.
if (src != NULL) {
RUBY_ASSERT(capa >= copy_len);
for (size_t i = 0; i < copy_len; i++) {
Expand All @@ -270,13 +277,16 @@ rb_mmtk_ary_new_objbuf_copy(VALUE ary, size_t capa, const VALUE *src, size_t cop

RARRAY(ary)->as.heap.ptr = elems;
rb_mmtk_ary_set_objbuf(ary, (VALUE)objbuf);

// Keep `src_obj` alive and pinned until the function exits.
RB_GC_GUARD(src_obj);
}

// Attach a heap array with a newly allocated empty imemo:mmtk_objbuf.
static inline void
rb_mmtk_ary_new_objbuf(VALUE ary, size_t capa)
{
rb_mmtk_ary_new_objbuf_copy(ary, capa, NULL, 0);
rb_mmtk_ary_new_objbuf_copy(ary, capa, 0, NULL, 0);
}

#endif
Expand Down Expand Up @@ -459,7 +469,7 @@ ary_heap_realloc(VALUE ary, size_t new_capa)
} else {
size_t old_capa = ARY_HEAP_CAPA(ary);
size_t copy_len = new_capa < old_capa ? new_capa : old_capa;
rb_mmtk_ary_new_objbuf_copy(ary, new_capa, ARY_HEAP_PTR(ary), copy_len);
rb_mmtk_ary_new_objbuf_copy(ary, new_capa, RARRAY_EXT(ary)->objbuf, ARY_HEAP_PTR(ary), copy_len);
}
#endif

Expand Down Expand Up @@ -506,7 +516,7 @@ ary_resize_capa(VALUE ary, long capacity)
ARY_SET_PTR(ary, ptr);
#if USE_MMTK
} else {
rb_mmtk_ary_new_objbuf_copy(ary, capacity, ARY_EMBED_PTR(ary), len);
rb_mmtk_ary_new_objbuf_copy(ary, capacity, ary, ARY_EMBED_PTR(ary), len);
FL_UNSET_EMBED(ary);
}
#endif
Expand Down Expand Up @@ -673,7 +683,7 @@ rb_ary_cancel_sharing(VALUE ary)
ARY_SET_PTR(ary, ptr);
#if USE_MMTK
} else {
rb_mmtk_ary_new_objbuf_copy(ary, len, ARY_HEAP_PTR(ary), len);
rb_mmtk_ary_new_objbuf_copy(ary, len, RARRAY_EXT(ary)->objbuf, ARY_HEAP_PTR(ary), len);
}
#endif
rb_ary_unshare(ary);
Expand Down Expand Up @@ -1115,7 +1125,10 @@ ary_make_shared(VALUE ary)
ARY_SET_PTR(ary, ptr);
#if USE_MMTK
} else {
rb_mmtk_ary_new_objbuf_copy(shared, capa, RARRAY_CONST_PTR(ary), len);
rb_mmtk_ary_new_objbuf_copy(shared, capa,
rb_mmtk_array_content_holder(ary),
RARRAY_CONST_PTR(ary),
len);
FL_UNSET_EMBED(ary);
ARY_SET_PTR(ary, ARY_HEAP_PTR(shared));
rb_mmtk_ary_copy_objbuf(ary, shared);
Expand Down Expand Up @@ -4799,7 +4812,7 @@ rb_ary_replace(VALUE copy, VALUE orig)
ary_memcpy(copy, 0, len, RARRAY_CONST_PTR(orig));
#if USE_MMTK
} else {
rb_mmtk_ary_new_objbuf_copy(copy, len, ARY_EMBED_PTR(orig), len);
rb_mmtk_ary_new_objbuf_copy(copy, len, orig, ARY_EMBED_PTR(orig), len);
FL_UNSET_EMBED(copy);
ARY_SET_LEN(copy, len);
ARY_SET_CAPA(copy, len);
Expand Down
30 changes: 28 additions & 2 deletions include/ruby/internal/core/rarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,12 +314,32 @@ rb_array_const_ptr(VALUE a)
}
}

#if USE_MMTK
/**
* @private
*
* This is an implementation detail of #RARRAY_PTR_USE. People do not use it
* directly.
* Return the object that holds the content of the array.
* Only relevant when using MMTk.
* For embedded arrays, it is the array itself;
* for heap arrays, it is the the underlying imemo:mmtk_objbuf
*
* @param[in] a An object of ::RArray.
* @return The object holding its backend storage.
*/
static inline VALUE
rb_mmtk_array_content_holder(VALUE a)
{
RBIMPL_ASSERT_TYPE(a, RUBY_T_ARRAY);

if (RB_FL_ANY_RAW(a, RARRAY_EMBED_FLAG)) {
return a;
}
else {
return RARRAY_EXT(a)->objbuf;
}
}
#endif

#if USE_MMTK
// Defined in mmtk_support.c
bool rb_mmtk_enabled_p(void);
Expand All @@ -343,6 +363,12 @@ void rb_mmtk_pin_array_buffer(VALUE array, volatile VALUE *stack_slot);
rb_mmtk_impl_pinned = 0; \
} while (0)
#else
/**
* @private
*
* This is an implementation detail of #RARRAY_PTR_USE. People do not use it
* directly.
*/
#define RBIMPL_RARRAY_STMT(ary, var, expr) do { \
RBIMPL_ASSERT_TYPE((ary), RUBY_T_ARRAY); \
const VALUE rbimpl_ary = (ary); \
Expand Down
52 changes: 45 additions & 7 deletions string.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,40 +209,65 @@ str_enc_fastpath(VALUE str)
static inline long str_embed_capa(VALUE str);
static inline int str_dependent_p(VALUE str);

static inline VALUE
// Return the object that actually holds the content of `str`.
// For embedded strings, it is the string itself;
// For heap strings, it is the backing strbuf.
rb_mmtk_string_content_holder(VALUE str)
{
RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING);

if (STR_EMBED_P(str)) {
return str;
}
else {
return RSTRING_EXT(str)->strbuf;
}
}

void
rb_mmtk_str_set_strbuf(VALUE str, VALUE strbuf)
{
RUBY_ASSERT(rb_mmtk_enabled_p());
RB_OBJ_WRITE(str, RSTRING_EXT(str), strbuf);
}

// Attach a heap string with a newly allocated imemo:mmtk_strbuf and memcpy the given content.
// Note that `str` may be an existing string and `src` may point into `str` or its existing
// buffer. Do not modify `str` until the new strbuf is fully written.
// Attach a heap string `str` with a newly allocated imemo:mmtk_strbuf of a given capacity `capa`.
// The first `copy_size` bytes of the new buffer is copied from `src`, and `copy_size` must not
// exceed `capa`.
//
// `src` may point to an element of another heap object, in which case `src_obj` must point to the
// object into which `src` is pointed, and `src_obj` will be pinned during the execution of this
// function. If `src` does not point into another heap object, `src_obj` may be 0.
static inline void
rb_mmtk_str_new_strbuf_copy(VALUE str, size_t capa, const char *src, size_t copy_size)
rb_mmtk_str_new_strbuf_copy(VALUE str, size_t capa, VALUE src_obj, const char *src, size_t copy_size)
{
RUBY_ASSERT(rb_mmtk_enabled_p());

// When using MMTk, as.heap.ptr points to the ary field of a rb_mmtk_strbuf_t
// which is allocated in the heap as an imemo:mmtk_strbuf.
rb_mmtk_strbuf_t *strbuf = rb_mmtk_new_strbuf(capa);
rb_mmtk_strbuf_t *strbuf = rb_mmtk_new_strbuf(capa); // This may trigger GC, causing objects to be moved.
char *chars = rb_mmtk_strbuf_to_chars(strbuf);

// Note that `str` may be an existing string and `src` may point into `str` or its existing
// buffer. Do not modify `str` until the new strbuf is fully written.
if (src != NULL) {
RUBY_ASSERT(capa >= copy_size);
memcpy(chars, src, copy_size);
}

RSTRING(str)->as.heap.ptr = chars;
rb_mmtk_str_set_strbuf(str, (VALUE)strbuf);

// Keep `src_obj` alive and pinned until the function exits.
RB_GC_GUARD(src_obj);
}

// Attach a heap string with a newly allocated empty imemo:mmtk_strbuf.
static inline void
rb_mmtk_str_new_strbuf(VALUE str, size_t capa)
{
rb_mmtk_str_new_strbuf_copy(str, capa, NULL, 0);
rb_mmtk_str_new_strbuf_copy(str, capa, 0, NULL, 0);
}

static inline void
Expand All @@ -253,7 +278,12 @@ rb_mmtk_resize_capa_term(VALUE str, size_t capacity, size_t termlen)
if (STR_EMBED_P(str)) {
if ((size_t)str_embed_capa(str) < capacity + termlen) {
const long tlen = RSTRING_LEN(str);
rb_mmtk_str_new_strbuf_copy(str, (size_t)(capacity) + (termlen), RSTRING_PTR(str), tlen);
rb_mmtk_str_new_strbuf_copy(
str,
(size_t)(capacity) + (termlen),
str,
RSTRING_PTR(str),
tlen);
RSTRING(str)->len = tlen;
STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.aux.capa = (capacity);
Expand All @@ -264,6 +294,7 @@ rb_mmtk_resize_capa_term(VALUE str, size_t capacity, size_t termlen)
rb_mmtk_str_new_strbuf_copy(
str,
(size_t)(capacity) + (termlen),
RSTRING_EXT(str)->strbuf,
RSTRING(str)->as.heap.ptr,
STR_HEAP_SIZE(str));
RSTRING(str)->as.heap.aux.capa = (capacity);
Expand All @@ -285,6 +316,7 @@ rb_mmtk_str_sized_realloc_n(VALUE str, size_t new_size)
rb_mmtk_str_new_strbuf_copy(
str,
new_size,
RSTRING_EXT(str)->strbuf,
RSTRING(str)->as.heap.ptr,
copy_size);
RSTRING(str)->as.heap.aux.capa = new_size;
Expand Down Expand Up @@ -1855,6 +1887,7 @@ str_shared_replace(VALUE str, VALUE str2)
rb_mmtk_str_new_strbuf_copy(
str2,
len + termlen,
str2,
RSTRING(str2)->as.embed.ary,
len + termlen);
}
Expand Down Expand Up @@ -2148,6 +2181,7 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
rb_mmtk_str_new_strbuf_copy(
str,
(size_t)capa + termlen,
str,
RSTRING(str)->as.embed.ary,
RSTRING_LEN(str) + 1);
}
Expand All @@ -2168,6 +2202,7 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
rb_mmtk_str_new_strbuf_copy(
str,
(size_t)capa + termlen,
RSTRING_EXT(str)->strbuf,
old_ptr,
osize < size ? osize : size);
}
Expand Down Expand Up @@ -2799,6 +2834,7 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen)
rb_mmtk_str_new_strbuf_copy(
str,
(size_t)capa + termlen,
rb_mmtk_string_content_holder(str),
oldptr,
len);
ptr = RSTRING(str)->as.heap.ptr;
Expand Down Expand Up @@ -8436,6 +8472,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
rb_mmtk_str_new_strbuf_copy(
str,
max + termlen,
0,
(char *)buf,
max + termlen);
}
Expand Down Expand Up @@ -8527,6 +8564,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
rb_mmtk_str_new_strbuf_copy(
str,
max + termlen,
0,
(char *)buf,
max + termlen);
}
Expand Down

0 comments on commit f58ec50

Please sign in to comment.