From f289b0d6889e7cbd2ebac62aab0bcbf77cee3028 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Thu, 2 Jul 2026 12:21:58 -0700 Subject: [PATCH 1/7] Specify attr! leaf on some ractor.rb functions These are all simple getters which don't allocate or raise exceptions. We can declare them as a leaf and skip pushing frames inside YJIT/ZJIT. --- ractor.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ractor.rb b/ractor.rb index 366a1192c818d2..1168ed7803599d 100644 --- a/ractor.rb +++ b/ractor.rb @@ -242,6 +242,7 @@ def self.new(*args, name: nil, &block) # # Ractor.current #=> # def self.current + Primitive.attr! :leaf __builtin_cexpr! %q{ rb_ractor_self(rb_ec_ractor_ptr(ec)); } @@ -256,6 +257,7 @@ def self.current # r.join # wait for r's termination # Ractor.count #=> 1 def self.count + Primitive.attr! :leaf __builtin_cexpr! %q{ ULONG2NUM(GET_VM()->ractor.cnt); } @@ -385,6 +387,7 @@ def inspect # Returns the name set in Ractor.new, or +nil+. def name + Primitive.attr! :leaf __builtin_cexpr! %q{RACTOR_PTR(self)->name} end @@ -517,6 +520,7 @@ def self.store_if_absent(sym) # Returns the main ractor. def self.main + Primitive.attr! :leaf __builtin_cexpr! %q{ rb_ractor_self(GET_VM()->ractor.main_ractor); } @@ -524,6 +528,7 @@ def self.main # Returns true if the current ractor is the main ractor. def self.main? + Primitive.attr! :leaf __builtin_cexpr! %q{ RBOOL(GET_VM()->ractor.main_ractor == rb_ec_ractor_ptr(ec)) } @@ -564,6 +569,7 @@ def require feature # :nodoc: -- otherwise RDoc outputs it as a class method # Returns the default port of the Ractor. # def default_port + Primitive.attr! :leaf __builtin_cexpr! %q{ ractor_default_port_value(RACTOR_PTR(self)) } @@ -819,6 +825,7 @@ def close # # Returns whether or not the port is closed. def closed? + Primitive.attr! :leaf __builtin_cexpr! %q{ ractor_port_closed_p(ec, self); } From 6819e80a07e7891effbebda47b6383be0acec42e Mon Sep 17 00:00:00 2001 From: Koichi Sasada Date: Thu, 2 Jul 2026 20:32:14 +0000 Subject: [PATCH 2/7] test/unit: optionally verify GC consistency after each test Set RUBY_TEST_GC_VERIFY to call GC.verify_internal_consistency after each test. An integer >= 2 samples once every N tests; any other non-empty value verifies every test. Useful for pinning down which test corrupts the heap. Co-Authored-By: Claude Opus 4.8 (1M context) --- tool/lib/test/unit.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tool/lib/test/unit.rb b/tool/lib/test/unit.rb index 0eb8392179a69b..a66d248ff5a3d2 100644 --- a/tool/lib/test/unit.rb +++ b/tool/lib/test/unit.rb @@ -1677,6 +1677,19 @@ def _run_suite suite, type leakchecker.check("#{inst.class}\##{inst.__name__}") + # Optionally verify GC internal consistency after each test. An + # integer >= 2 samples once every N tests (use a prime so a + # randomized test order samples a different set each run); any other + # non-empty value verifies every test (O(heap) per test, so only + # practical for small runs). + if (interval = ENV["RUBY_TEST_GC_VERIFY"]) + n = interval.to_i + @__gc_verify_tick = (@__gc_verify_tick || 0) + 1 + if n <= 1 || (@__gc_verify_tick % n).zero? + GC.verify_internal_consistency + end + end + _end_method(inst) inst._assertions From 051c9db646e5422e6de16f5259254fe1ffd5d909 Mon Sep 17 00:00:00 2001 From: Koichi Sasada Date: Thu, 2 Jul 2026 20:05:18 +0000 Subject: [PATCH 3/7] coroutine (amd64): annotate fiber switches for ThreadSanitizer ThreadSanitizer cannot follow Ruby's userspace coroutine stack switches, so its per-thread shadow stack leaks across a switch and eventually faults inside libtsan. Create a TSan fiber per coroutine and switch to it at each transfer, mirroring the existing AddressSanitizer annotations. The main context borrows the OS thread's implicit fiber and must not destroy it. Only amd64 is annotated here. Co-Authored-By: Claude Opus 4.8 (1M context) --- coroutine/amd64/Context.h | 46 +++++++++++++++++++++++++++++++++++++++ thread_pthread.c | 7 ++++++ 2 files changed, 53 insertions(+) diff --git a/coroutine/amd64/Context.h b/coroutine/amd64/Context.h index 65aa6383044973..0deb9b6dd4fb3e 100644 --- a/coroutine/amd64/Context.h +++ b/coroutine/amd64/Context.h @@ -32,6 +32,22 @@ enum {COROUTINE_REGISTERS = 6}; #include #endif +#if defined(__SANITIZE_THREAD__) + #define COROUTINE_SANITIZE_THREAD +#elif defined(__has_feature) + #if __has_feature(thread_sanitizer) + #define COROUTINE_SANITIZE_THREAD + #endif +#endif + +#if defined(COROUTINE_SANITIZE_THREAD) +/* ThreadSanitizer cannot follow a userspace stack switch on its own: its + * per-OS-thread shadow stack must be handed to the destination fiber on every + * coroutine switch via the fiber API, otherwise it leaks the shadow stack + * across switches and eventually faults inside libtsan. */ +#include +#endif + struct coroutine_context { void **stack_pointer; @@ -42,12 +58,26 @@ struct coroutine_context void *stack_base; size_t stack_size; #endif + +#if defined(COROUTINE_SANITIZE_THREAD) + void *tsan_fiber; + /* Whether we created tsan_fiber (via __tsan_create_fiber, must be + * destroyed) or borrowed it from __tsan_get_current_fiber (the OS thread's + * implicit fiber, owned by TSan; must not be destroyed). */ + int tsan_fiber_owned; +#endif }; typedef COROUTINE(* coroutine_start)(struct coroutine_context *from, struct coroutine_context *self); static inline void coroutine_initialize_main(struct coroutine_context * context) { context->stack_pointer = NULL; + +#if defined(COROUTINE_SANITIZE_THREAD) + /* The OS thread's implicit (already running) fiber, owned by TSan. */ + context->tsan_fiber = __tsan_get_current_fiber(); + context->tsan_fiber_owned = 0; +#endif } static inline void coroutine_initialize( @@ -64,6 +94,11 @@ static inline void coroutine_initialize( context->stack_size = size; #endif +#if defined(COROUTINE_SANITIZE_THREAD) + context->tsan_fiber = __tsan_create_fiber(0); + context->tsan_fiber_owned = 1; +#endif + // Stack grows down. Force 16-byte alignment. char * top = (char*)stack + size; context->stack_pointer = (void**)((uintptr_t)top & ~0xF); @@ -80,6 +115,17 @@ struct coroutine_context * coroutine_transfer(struct coroutine_context * current static inline void coroutine_destroy(struct coroutine_context * context) { context->stack_pointer = NULL; + +#if defined(COROUTINE_SANITIZE_THREAD) + /* Only destroy fibers we created. The borrowed __tsan_get_current_fiber() + * handle (the OS thread's implicit fiber) is owned by TSan; destroying it + * aborts libtsan (FiberDestroy -> ProcWire CheckFailed). */ + if (context->tsan_fiber && context->tsan_fiber_owned) { + __tsan_destroy_fiber(context->tsan_fiber); + context->tsan_fiber = NULL; + context->tsan_fiber_owned = 0; + } +#endif } #endif /* COROUTINE_AMD64_CONTEXT_H */ diff --git a/thread_pthread.c b/thread_pthread.c index 3214e25560c5b3..f819a0030e2d3b 100644 --- a/thread_pthread.c +++ b/thread_pthread.c @@ -1237,6 +1237,13 @@ coroutine_transfer0(struct coroutine_context *transfer_from, struct coroutine_co __sanitizer_start_switch_fiber(fake_stack, transfer_to->stack_base, transfer_to->stack_size); #endif +#if defined(COROUTINE_SANITIZE_THREAD) + /* Tell TSan we are switching to transfer_to's fiber before the stack + * switch, so its per-thread shadow stack stays bound to the right + * coroutine. */ + __tsan_switch_to_fiber(transfer_to->tsan_fiber, 0); +#endif + RBIMPL_ATTR_MAYBE_UNUSED() struct coroutine_context *returning_from = coroutine_transfer(transfer_from, transfer_to); From b5dc95c6496ac83c7a5f523371f03dd0701ee96a Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Thu, 2 Jul 2026 19:13:33 -0500 Subject: [PATCH 4/7] [DOC] Update Set#to_set documentation Co-authored-by: Jeremy Evans --- set.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/set.c b/set.c index e8dab7b95334a6..c3409afe006111 100644 --- a/set.c +++ b/set.c @@ -677,10 +677,31 @@ set_i_to_a(VALUE set) /* * call-seq: - * to_set(&block) -> self or new_set + * to_set {|element| ... } -> new_set + * to_set -> self or new_set * - * Without a block, if +self+ is an instance of +Set+, returns +self+. - * Otherwise, calls Set.new(self, &block). + * With a block given, creates and returns a new set; + * calls the block with each element of +self+, + * and adds the block's returns value to the new set: + * + * set = Set[*0..9] # => Set[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + * set.to_set {|i| i * 2 } # => Set[0, 2, 4, 6, 8, 10, 12, 14, 16, 18] + * + * With no block given, when +self+ is an instance of +Set+, + * returns +self+: + * + * set = Set[*0..9] + * set.to_set + * set.to_set.equal?(set) # => true + * + * With no block given, when +self+ is an instance of a subclass of +Set+, + * returns a \Set object containing the elements of +self+: + * + * class MySet < Set; end + * my_set = MySet[*0..9] # => # + * set = my_set.to_set # => Set[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + * + * Related: see {Methods for Converting}[rdoc-ref:Set@Methods+for+Converting]. */ static VALUE set_i_to_set(VALUE set) From 82979eed5a8be25ea8593ffdf773b61bf7c259ba Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Thu, 2 Jul 2026 19:16:58 -0500 Subject: [PATCH 5/7] [DOC] Update Set assign methods documentation Co-authored-by: Jeremy Evans --- set.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/set.c b/set.c index c3409afe006111..a5619da15c02bc 100644 --- a/set.c +++ b/set.c @@ -2463,15 +2463,14 @@ rb_set_size(VALUE set) * === Methods for Assigning * * - #add (aliased as #<<): - * Adds a given object to the set; returns +self+. + * Adds the given object to +self+, returns +self+. * - #add?: - * If the given object is not an element in the set, - * adds it and returns +self+; otherwise, returns +nil+. + * Like #add, but returns +nil+ if the given object is already in +self+. * - #merge: - * Merges the elements of each given enumerable object to the set; returns +self+. + * Adds the elements of the given enumerables to +self+; returns +self+. * - #replace: - * Replaces the contents of the set with the contents - * of a given enumerable. + * Replaces the contents of +self+ with the contents of the given enumerable; + * returns +self+. * * === Methods for Deleting * From 677f9f5a2910ad530fe03170239cb0a6997ea023 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Thu, 2 Jul 2026 19:20:53 -0500 Subject: [PATCH 6/7] [DOC] Update Set#size documentation Co-authored-by: Jeremy Evans --- set.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/set.c b/set.c index a5619da15c02bc..ecfdcd343eca3b 100644 --- a/set.c +++ b/set.c @@ -1394,7 +1394,11 @@ set_i_compare_by_identity_p(VALUE set) * call-seq: * size -> integer * - * Returns the number of elements. + * Returns the number of elements in +self+: + * + * Set[*0..9].size # => 10 + * + * Related: see {Methods for Querying}[rdoc-ref:Set@Methods+for+Querying]. */ static VALUE set_i_size(VALUE set) @@ -2439,7 +2443,7 @@ rb_set_size(VALUE set) * - #compare_by_identity?: * Returns whether the set considers only identity * when comparing elements. - * - #length (aliased as #size): + * - #size (aliased as #length): * Returns the count of elements. * - #empty?: * Returns whether the set has no elements. From 354806506c4ee8c073659092e6d95446f75ae7f8 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Thu, 2 Jul 2026 19:25:06 -0500 Subject: [PATCH 7/7] [DOC] Update Set operations methods documentation --- set.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/set.c b/set.c index ecfdcd343eca3b..2827df6c4f298e 100644 --- a/set.c +++ b/set.c @@ -2419,17 +2419,13 @@ rb_set_size(VALUE set) * * === Methods for \Set Operations * - * - #| (aliased as #union and #+): - * Returns a new set containing all elements from +self+ - * and all elements from a given enumerable (no duplicates). * - #& (aliased as #intersection): - * Returns a new set containing all elements common to +self+ - * and a given enumerable. + * Returns a new set containing the intersection of +self+ and the given enumerable. * - #- (aliased as #difference): - * Returns a copy of +self+ with all elements - * in a given enumerable removed. - * - #^: Returns a new set containing all elements from +self+ - * and a given enumerable except those common to both. + * Returns a new set containing the difference of +self+ and the given enumerable. + * - #^: Returns a new set containing the exclusive OR of +self+ and the given enumerable. + * - #| (aliased as #union and #+): + * Returns a new set containing the union of +self+ and the given enumerable. * * === Methods for Comparing *