From 8eab3172788ac1f2760d7df46e07e5b0ae7e7ea6 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Tue, 24 Feb 2026 16:30:43 -0800 Subject: [PATCH 1/3] Convert cc_refinement_table into TypedData object This converts the cc_refinement_table in the VM into a standard TypedData object using rb_gc_declare_weak_references to allow it to be . This allows us to store all the refinements in a flat array, and simply remove them as that are GC'd, we don't need a set for efficient deletes. This should also allow us to assume that imemo_callcache never requires additional cleanup. --- gc.c | 37 -------------- gc/gc.h | 1 - method.h | 1 - vm.c | 11 ++--- vm_core.h | 2 +- vm_method.c | 138 ++++++++++++++++++++++++++++++++++++---------------- 6 files changed, 102 insertions(+), 88 deletions(-) diff --git a/gc.c b/gc.c index 1af2ba41e7bf73..bc5a523f711b5c 100644 --- a/gc.c +++ b/gc.c @@ -2270,15 +2270,6 @@ rb_gc_obj_free_vm_weak_references(VALUE obj) break; case T_IMEMO: switch (imemo_type(obj)) { - case imemo_callcache: { - const struct rb_callcache *cc = (const struct rb_callcache *)obj; - - if (vm_cc_refinement_p(cc)) { - rb_vm_delete_cc_refinement(cc); - } - - break; - } case imemo_callinfo: rb_vm_ci_free((const struct rb_callinfo *)obj); break; @@ -4062,23 +4053,6 @@ vm_weak_table_foreach_update_weak_key(st_data_t *key, st_data_t *value, st_data_ return ret; } -static int -vm_weak_table_cc_refinement_foreach(st_data_t key, st_data_t data, int error) -{ - struct global_vm_table_foreach_data *iter_data = (struct global_vm_table_foreach_data *)data; - - return iter_data->callback((VALUE)key, iter_data->data); -} - -static int -vm_weak_table_cc_refinement_foreach_update_update(st_data_t *key, st_data_t data, int existing) -{ - struct global_vm_table_foreach_data *iter_data = (struct global_vm_table_foreach_data *)data; - - return iter_data->update_callback((VALUE *)key, iter_data->data); -} - - static int vm_weak_table_sym_set_foreach(VALUE *sym_ptr, void *data) { @@ -4275,17 +4249,6 @@ rb_gc_vm_weak_table_foreach(vm_table_foreach_callback_func callback, ); break; } - case RB_GC_VM_CC_REFINEMENT_TABLE: { - if (vm->cc_refinement_table) { - set_foreach_with_replace( - vm->cc_refinement_table, - vm_weak_table_cc_refinement_foreach, - vm_weak_table_cc_refinement_foreach_update_update, - (st_data_t)&foreach_data - ); - } - break; - } case RB_GC_VM_WEAK_TABLE_COUNT: rb_bug("Unreachable"); default: diff --git a/gc/gc.h b/gc/gc.h index 5979b4a00193e2..469a4902f03365 100644 --- a/gc/gc.h +++ b/gc/gc.h @@ -31,7 +31,6 @@ enum rb_gc_vm_weak_tables { RB_GC_VM_ID2REF_TABLE, RB_GC_VM_GENERIC_FIELDS_TABLE, RB_GC_VM_FROZEN_STRINGS_TABLE, - RB_GC_VM_CC_REFINEMENT_TABLE, RB_GC_VM_WEAK_TABLE_COUNT }; diff --git a/method.h b/method.h index 260344d53b58ba..fdbab41cf71bba 100644 --- a/method.h +++ b/method.h @@ -264,7 +264,6 @@ void rb_scope_visibility_set(rb_method_visibility_t); VALUE rb_unnamed_parameters(int arity); void rb_vm_insert_cc_refinement(const struct rb_callcache *cc); -void rb_vm_delete_cc_refinement(const struct rb_callcache *cc); void rb_clear_method_cache(VALUE klass_or_module, ID mid); void rb_clear_all_refinement_method_cache(void); diff --git a/vm.c b/vm.c index a078f9e7344e4c..4f9c1c7402dbf4 100644 --- a/vm.c +++ b/vm.c @@ -3236,6 +3236,7 @@ rb_vm_update_references(void *ptr) vm->self = rb_gc_location(vm->self); vm->mark_object_ary = rb_gc_location(vm->mark_object_ary); vm->orig_progname = rb_gc_location(vm->orig_progname); + vm->cc_refinement_set = rb_gc_location(vm->cc_refinement_set); if (vm->root_box) rb_box_gc_update_references(vm->root_box); @@ -3324,6 +3325,7 @@ rb_vm_mark(void *ptr) rb_gc_mark_movable(vm->orig_progname); rb_gc_mark_movable(vm->coverages); rb_gc_mark_movable(vm->me2counter); + rb_gc_mark_movable(vm->cc_refinement_set); rb_gc_mark_values(RUBY_NSIG, vm->trap_list.cmd); @@ -3414,10 +3416,6 @@ ruby_vm_destruct(rb_vm_t *vm) st_free_table(vm->ci_table); vm->ci_table = NULL; } - if (vm->cc_refinement_table) { - rb_set_free_table(vm->cc_refinement_table); - vm->cc_refinement_table = NULL; - } RB_ALTSTACK_FREE(vm->main_altstack); struct global_object_list *next; @@ -3510,7 +3508,6 @@ vm_memsize(const void *ptr) vm_memsize_builtin_function_table(vm->builtin_function_table) + rb_id_table_memsize(vm->negative_cme_table) + rb_st_memsize(vm->overloaded_cme_table) + - rb_set_memsize(vm->cc_refinement_table) + vm_memsize_constant_cache() ); @@ -4736,6 +4733,8 @@ rb_vm_register_global_object(VALUE obj) } } +VALUE rb_cc_refinement_set_create(void); + void Init_vm_objects(void) { @@ -4744,7 +4743,7 @@ Init_vm_objects(void) /* initialize mark object array, hash */ vm->mark_object_ary = pin_array_list_new(Qnil); vm->ci_table = st_init_table(&vm_ci_hashtype); - vm->cc_refinement_table = rb_set_init_numtable(); + vm->cc_refinement_set = rb_cc_refinement_set_create(); } // Whether JIT is enabled or not, we need to load/undef `#with_jit` for other builtins. diff --git a/vm_core.h b/vm_core.h index 7fcf8ca5c14439..ebc3fc5d27b677 100644 --- a/vm_core.h +++ b/vm_core.h @@ -816,7 +816,7 @@ typedef struct rb_vm_struct { struct rb_id_table *negative_cme_table; st_table *overloaded_cme_table; // cme -> overloaded_cme set_table *unused_block_warning_table; - set_table *cc_refinement_table; + VALUE cc_refinement_set; // This id table contains a mapping from ID to ICs. It does this with ID // keys and nested st_tables as values. The nested tables have ICs as keys diff --git a/vm_method.c b/vm_method.c index 5289bf03f8e741..dc314950df40d7 100644 --- a/vm_method.c +++ b/vm_method.c @@ -587,32 +587,6 @@ rb_invalidate_method_caches(struct rb_id_table *cm_tbl, VALUE cc_tbl) } } -static int -invalidate_cc_refinement(st_data_t key, st_data_t data) -{ - VALUE v = (VALUE)key; - void *ptr = rb_asan_poisoned_object_p(v); - rb_asan_unpoison_object(v, false); - - if (rb_gc_pointer_to_heap_p(v) && - !rb_objspace_garbage_object_p(v) && - RBASIC(v)->flags) { // liveness check - const struct rb_callcache *cc = (const struct rb_callcache *)v; - - VM_ASSERT(vm_cc_refinement_p(cc)); - - if (vm_cc_valid(cc)) { - vm_cc_invalidate(cc); - } - } - - if (ptr) { - rb_asan_poison_object(v); - } - - return ST_CONTINUE; -} - static st_index_t vm_ci_hash(VALUE v) { @@ -722,28 +696,94 @@ rb_vm_ci_free(const struct rb_callinfo *ci) st_delete(vm->ci_table, &key, NULL); } -void -rb_vm_insert_cc_refinement(const struct rb_callcache *cc) +struct cc_refinement_entries { + VALUE *entries; + size_t len; + size_t capa; +}; + +static void +cc_refinement_set_free(void *ptr) { - st_data_t key = (st_data_t)cc; + struct cc_refinement_entries *e = ptr; + xfree(e->entries); +} - rb_vm_t *vm = GET_VM(); - RB_VM_LOCK_ENTER(); - { - rb_set_insert(vm->cc_refinement_table, key); +static size_t +cc_refinement_set_memsize(const void *ptr) +{ + const struct cc_refinement_entries *e = ptr; + return e->capa * sizeof(VALUE); +} + +static void +cc_refinement_set_compact(void *ptr) +{ + struct cc_refinement_entries *e = ptr; + for (size_t i = 0; i < e->len; i++) { + e->entries[i] = rb_gc_location(e->entries[i]); } - RB_VM_LOCK_LEAVE(); } -void -rb_vm_delete_cc_refinement(const struct rb_callcache *cc) +static void +cc_refinement_set_handle_weak_references(void *ptr) { - ASSERT_vm_locking(); + struct cc_refinement_entries *e = ptr; + size_t write = 0; + for (size_t read = 0; read < e->len; read++) { + if (rb_gc_handle_weak_references_alive_p(e->entries[read])) { + e->entries[write++] = e->entries[read]; + } + } + e->len = write; +} + +static const rb_data_type_t cc_refinement_set_type = { + "VM/cc_refinement_set", + { + NULL, + cc_refinement_set_free, + cc_refinement_set_memsize, + cc_refinement_set_compact, + cc_refinement_set_handle_weak_references, + }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE +}; + +VALUE +rb_cc_refinement_set_create(void) +{ + struct cc_refinement_entries *e; + VALUE obj = TypedData_Make_Struct(0, struct cc_refinement_entries, &cc_refinement_set_type, e); + + e->entries = NULL; + e->len = 0; + e->capa = 0; + + rb_gc_declare_weak_references(obj); + + return obj; +} +void +rb_vm_insert_cc_refinement(const struct rb_callcache *cc) +{ rb_vm_t *vm = GET_VM(); - st_data_t key = (st_data_t)cc; + RB_VM_LOCK_ENTER(); + { + struct cc_refinement_entries *e = RTYPEDDATA_GET_DATA(vm->cc_refinement_set); + if (e->len == e->capa) { + size_t new_capa = e->capa == 0 ? 16 : e->capa * 2; + SIZED_REALLOC_N(e->entries, VALUE, new_capa, e->capa); + e->capa = new_capa; + } + e->entries[e->len++] = (VALUE)cc; - rb_set_table_delete(vm->cc_refinement_table, &key); + // We never mark the cc, but we need to issue a writebarrier so that + // the refinement set can be added to the remembered set + RB_OBJ_WRITTEN(vm->cc_refinement_set, Qundef, (VALUE)cc); + } + RB_VM_LOCK_LEAVE(); } void @@ -753,9 +793,23 @@ rb_clear_all_refinement_method_cache(void) RB_VM_LOCK_ENTER(); { - rb_set_table_foreach(vm->cc_refinement_table, invalidate_cc_refinement, (st_data_t)NULL); - rb_set_table_clear(vm->cc_refinement_table); - rb_set_compact_table(vm->cc_refinement_table); + struct cc_refinement_entries *e = RTYPEDDATA_GET_DATA(vm->cc_refinement_set); + for (size_t i = 0; i < e->len; i++) { + VALUE v = e->entries[i]; + + // All objects should be live as weak references are pruned in + // cc_refinement_set_handle_weak_references + VM_ASSERT(rb_gc_pointer_to_heap_p(v)); + VM_ASSERT(!rb_objspace_garbage_object_p(v)); + + const struct rb_callcache *cc = (const struct rb_callcache *)v; + VM_ASSERT(vm_cc_refinement_p(cc)); + + if (vm_cc_valid(cc)) { + vm_cc_invalidate(cc); + } + } + e->len = 0; } RB_VM_LOCK_LEAVE(); From 4455fdc201981122c67e9913d972b3cbd7b93e47 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Tue, 24 Feb 2026 20:38:36 -0800 Subject: [PATCH 2/3] Mark callcache as !needs_cleanup_p --- gc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gc.c b/gc.c index bc5a523f711b5c..26e65a90f3bc74 100644 --- a/gc.c +++ b/gc.c @@ -1278,6 +1278,7 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) case imemo_ifunc: case imemo_memo: case imemo_svar: + case imemo_callcache: case imemo_throw_data: return false; default: From b4aa7462be9913d1d3739bf56a0a71a763a5d4a3 Mon Sep 17 00:00:00 2001 From: ReLU <82130499+ykawabata17@users.noreply.github.com> Date: Fri, 27 Feb 2026 01:43:37 +0900 Subject: [PATCH 3/3] ZJIT: Fix ArgumentError for hash as positional arg to keyword-only method (GH-16223) --- zjit/src/codegen_tests.rs | 32 ++++++++++++++++++++++++++++++++ zjit/src/hir.rs | 28 +++++++++++++++++----------- 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/zjit/src/codegen_tests.rs b/zjit/src/codegen_tests.rs index ae5841ea7ddda5..980441c49104d9 100644 --- a/zjit/src/codegen_tests.rs +++ b/zjit/src/codegen_tests.rs @@ -821,6 +821,38 @@ fn test_send_kwarg_optional_static_with_side_exit() { "), @"[10, 2, 10]"); } +#[test] +fn test_send_hash_to_kwarg_only_method() { + assert_snapshot!(inspect(r#" + def callee(a:) = a + + def entry + callee({a: 1}) + rescue ArgumentError + "ArgumentError" + end + + entry + entry + "#), @r#""ArgumentError""#); +} + +#[test] +fn test_send_hash_to_optional_kwarg_only_method() { + assert_snapshot!(inspect(r#" + def callee(a: nil) = a + + def entry + callee({a: 1}) + rescue ArgumentError + "ArgumentError" + end + + entry + entry + "#), @r#""ArgumentError""#); +} + #[test] fn test_send_all_arg_types() { assert_snapshot!(inspect(" diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index e188f4aca89826..ba6ce8365eb4a1 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -1913,31 +1913,37 @@ fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq return false; } - // Because we exclude e.g. post parameters above, they are also excluded from the sum below. + // Because we exclude e.g. post parameters above, they are also excluded from the checks below. let lead_num = params.lead_num; let opt_num = params.opt_num; let keyword = params.keyword; let kw_req_num = if keyword.is_null() { 0 } else { unsafe { (*keyword).required_num } }; let kw_total_num = if keyword.is_null() { 0 } else { unsafe { (*keyword).num } }; - // Minimum args: all required positional + all required keywords - let min_argc = lead_num + kw_req_num; - // Maximum args: all positional (required + optional) + all keywords (required + optional) - let max_argc = lead_num + opt_num + kw_total_num; + let kwarg = unsafe { rb_vm_ci_kwarg(ci) }; + let caller_kw_count = if kwarg.is_null() { 0 } else { (unsafe { get_cikw_keyword_len(kwarg) }) as usize }; + let caller_positional = match args.len().checked_sub(caller_kw_count) { + Some(count) => count, + None => { + function.set_dynamic_send_reason(send_insn, ArgcParamMismatch); + return false; + } + }; - can_send = c_int::try_from(args.len()) + let positional_ok = c_int::try_from(caller_positional) .as_ref() - .map(|argc| (min_argc..=max_argc).contains(argc)) + .map(|argc| (lead_num..=lead_num + opt_num).contains(argc)) .unwrap_or(false); - if !can_send { + let keyword_ok = c_int::try_from(caller_kw_count) + .as_ref() + .map(|argc| (kw_req_num..=kw_total_num).contains(argc)) + .unwrap_or(false); + if !positional_ok || !keyword_ok { function.set_dynamic_send_reason(send_insn, ArgcParamMismatch); return false } // asm.ccall() doesn't support 6+ args. Compute the final argc after keyword setup: // final_argc = caller's positional args + callee's total keywords (all kw slots are filled). - let kwarg = unsafe { rb_vm_ci_kwarg(ci) }; - let caller_kw_count = if kwarg.is_null() { 0 } else { (unsafe { get_cikw_keyword_len(kwarg) }) as usize }; - let caller_positional = args.len() - caller_kw_count; // Right now, the JIT entrypoint accepts the block as an param // We may remove it, remove the block_arg addition to match // See: https://github.com/ruby/ruby/pull/15911#discussion_r2710544982