From fe107d4af9c52a17494e0a01746faedb0daf66b1 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Wed, 11 Mar 2026 17:00:31 -0400 Subject: [PATCH] Move genDispatch0 to macro assembler --- src/engine/x86-64/X86_64Interpreter.v3 | 67 +++++------------------ src/engine/x86-64/X86_64MacroAssembler.v3 | 40 +++++++++++++- 2 files changed, 54 insertions(+), 53 deletions(-) diff --git a/src/engine/x86-64/X86_64Interpreter.v3 b/src/engine/x86-64/X86_64Interpreter.v3 index 29307ca98..26b03aaac 100644 --- a/src/engine/x86-64/X86_64Interpreter.v3 +++ b/src/engine/x86-64/X86_64Interpreter.v3 @@ -784,7 +784,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { var pos = w.atEnd().pos; var refsndary = t.1; computeCurIpForTrap(-1); - genDispatch0(ip_ptr, refsndary, true); + masm.genDispatch0(ic, ip_ptr, refsndary, true); // main[prefix] = prefix's table writeDispatchEntry(ref0, prefix, pos); } @@ -801,7 +801,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // happens only with non-minimal encoding genSkipLeb(); asm.d.and_r_i(r_tmp0, 0x7F); // clear upper bit - genDispatch0(null, refsndary, false); // use secondary table + masm.genDispatch0(ic, null, refsndary, false); // use secondary table } else { // code needs to grab next byte to see what to do asm.movd_r_r(r_tmp2, r_tmp0); // save opcode @@ -833,7 +833,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // 1000 0000 -> 0000 0001 upper bit 0, LEB continues // 1000 0001 -> 0000 0011 upper bit 1, LEB continues // entries 0-3 of the LEB table are otherwise unused :-) - genDispatch0(null, refleb, false); + masm.genDispatch0(ic, null, refleb, false); // create and install the four dispatch sequences for mapped second byte @@ -843,7 +843,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // not used when encoding is minimal asm.movd_r_r(r_tmp0, r_tmp2); asm.d.and_r_i(r_tmp0, 0x7F); - genDispatch0(null, refsndary, false); + masm.genDispatch0(ic, null, refsndary, false); writeDispatchEntry(refleb, 0x00, leb00_pos); var leb01_pos = w.pos; @@ -853,14 +853,14 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { genSkipLeb(); asm.movd_r_r(r_tmp0, r_tmp2); // EBM check this! asm.d.and_r_i(r_tmp0, 0x7F); - genDispatch0(null, refsndary, false); + masm.genDispatch0(ic, null, refsndary, false); writeDispatchEntry(refleb, 0x01, leb01_pos); var leb02_pos = w.pos; // upper bit of opcode is 1, no LEB continuation: // dispatch through LEB, don't clear upper bit asm.movd_r_r(r_tmp0, r_tmp2); - genDispatch0(null, refleb, false); + masm.genDispatch0(ic, null, refleb, false); writeDispatchEntry(refleb, 0x02, leb02_pos); var leb03_pos = w.pos; @@ -870,7 +870,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // not used when encoding is minimal genSkipLeb(); asm.movd_r_r(r_tmp0, r_tmp2); - genDispatch0(null, refleb, false); + masm.genDispatch0(ic, null, refleb, false); writeDispatchEntry(refleb, 0x03, leb03_pos); } for (i = 128; i < 256; i++) { @@ -899,7 +899,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { var sub_index_leb = sub_index | byte.view(0x80); var refsub = t.4; - genDispatch0(ip_ptr, refsub, true); + masm.genDispatch0(ic, ip_ptr, refsub, true); // all uses of a subpage will have the LEB bit set writeDispatchEntry(refsndary, sub_index_leb, subpage_pos); @@ -913,7 +913,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { var subpage_leb_pos = w.pos; genSkipLeb(); asm.d.and_r_i(r_tmp0, 0x7F); - genDispatch0(null, refsub, false); + masm.genDispatch0(ic, null, refsub, false); for (i = 128; i < 256; ++i) { writeDispatchEntry(refsub, i, subpage_leb_pos); } @@ -1013,7 +1013,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.inc_r(r_ip); asm.cmp_r_i(opcode, Opcode.BLOCK.code); asm.jc_rel_near(C.Z, repeated_block); - genDispatch0(null, null, false); // inlined rest of dispatch + masm.genDispatch0(ic, null, null, false); // inlined rest of dispatch } else { endHandler(); } @@ -2646,7 +2646,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.movq_r_m(origIp, r_func_decl.plus(offsets.FuncDecl_orig_bytecode)); asm.add_r_r(origIp, pc); asm.sub_r_i(origIp, 1); - genDispatch0(origIp.indirect(), dispatchTables[0].1, false); + masm.genDispatch0(ic, origIp.indirect(), dispatchTables[0].1, false); } // Specialized handler for whamm probes, uses a trampoline. if (FastIntTuning.enableWhammProbeTrampoline) { @@ -2694,7 +2694,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.add_r_r(origIp, pc); asm.sub_r_i(origIp, 1); // Dispatch to the original bytecode at this pc. - genDispatch0(origIp.indirect(), dispatchTables[0].1, false); + masm.genDispatch0(ic, origIp.indirect(), dispatchTables[0].1, false); } // Specialized handler for breakpoint probe, issues int(3), which gdb sees as a signal. if (FastIntTuning.enableWhammProbeTrampoline) { @@ -2709,7 +2709,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { masm.emit_v3_FuncDecl_orig_bytecode_r_r(xenv.tmp0, xenv.func_decl); asm.add_r_r(origIp, pc); asm.sub_r_i(origIp, 1); - genDispatch0(origIp.indirect(), dispatchTables[0].1, false); + masm.genDispatch0(ic, origIp.indirect(), dispatchTables[0].1, false); } } def genGlobalProbeSupport() { @@ -2720,7 +2720,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { } genGlobalProbeCall(); asm.sub_r_i(r_ip, 1); - genDispatch0(r_ip.indirect(), dispatchTables[0].1, true); + masm.genDispatch0(ic, r_ip.indirect(), dispatchTables[0].1, true); // LOOP, BLOCK: don't tier up or skip blocks; call global probe and skip block type offset = w.atEnd().pos; @@ -3999,44 +3999,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { } // Generate a dispatch from the main dispatch table. def genDispatch() { - genDispatch0(ip_ptr, if (FeatureDisable.globalProbes, dispatchTables[0].1), true); - } - // Generate a load of the next bytecode and a dispatch through the dispatch table. - def genDispatch0(ptr: X86_64Addr, table: IcCodeRef, increment: bool) { - var opcode = r_tmp0; - var base = r_tmp1; - if (ptr != null) asm.movbzx_r_m(opcode, ptr); - if (increment) asm.inc_r(r_ip); - match (FastIntTuning.dispatchEntrySize) { - 2 => { - if (table == null) asm.movq_r_r(base, r_dispatch); - else asm.lea(base, table); // RIP-relative LEA - asm.movwsx_r_m(opcode, base.plusR(opcode, 2, 0)); // load 16-bit offset - asm.add_r_r(base, opcode); - if (dispatchJmpOffset < 0) dispatchJmpOffset = w.pos; - asm.ijmp_r(base); - } - 4 => { - if (table == null) { - asm.movd_r_m(base, r_dispatch.plusR(opcode, 4, 0)); - } else { - var addr = ic.start + table.offset; - asm.movd_r_m(base, X86_64Addr.new(null, opcode, 4, int.!(addr - Pointer.NULL))); - } - if (dispatchJmpOffset < 0) dispatchJmpOffset = w.pos; - asm.ijmp_r(base); - } - 8 => { - if (table == null) { - if (dispatchJmpOffset < 0) dispatchJmpOffset = w.pos; - asm.ijmp_m(r_dispatch.plusR(opcode, 8, 0)); - } else { - var addr = ic.start + table.offset; - if (dispatchJmpOffset < 0) dispatchJmpOffset = w.pos; - asm.ijmp_m(X86_64Addr.new(null, opcode, 8, int.!(addr - Pointer.NULL))); - } - } - } + masm.genDispatch0(ic, ip_ptr, if (FeatureDisable.globalProbes, dispatchTables[0].1), true); } // Patch the dispatch table for the given opcode to go to the given position. def patchDispatchTable(opcode: Opcode, pos: int) { diff --git a/src/engine/x86-64/X86_64MacroAssembler.v3 b/src/engine/x86-64/X86_64MacroAssembler.v3 index 181aa9696..2d0e497b1 100644 --- a/src/engine/x86-64/X86_64MacroAssembler.v3 +++ b/src/engine/x86-64/X86_64MacroAssembler.v3 @@ -92,6 +92,44 @@ class X86_64MacroAssembler extends MacroAssembler { return Reg(0); } + // Generate a load of the next bytecode and a dispatch through the dispatch table. + def genDispatch0(ic: X86_64InterpreterCode, ptr: X86_64Addr, table: IcCodeRef, increment: bool) { + def xenv: IntExecEnv = X86_64MasmRegs.INT_EXEC_ENV; + def opcode = G(xenv.tmp0); + def base = G(xenv.tmp1); + def r_ip = G(xenv.ip); + def r_dispatch = G(xenv.dispatch); + + if (ptr != null) asm.movbzx_r_m(opcode, ptr); + if (increment) asm.inc_r(r_ip); + match (FastIntTuning.dispatchEntrySize) { + 2 => { + if (table == null) asm.movq_r_r(base, r_dispatch); + else asm.lea(base, table); // RIP-relative LEA + asm.movwsx_r_m(opcode, base.plusR(opcode, 2, 0)); // load 16-bit offset + asm.add_r_r(base, opcode); + asm.ijmp_r(base); + } + 4 => { + if (table == null) { + asm.movd_r_m(base, r_dispatch.plusR(opcode, 4, 0)); + } else { + var addr = ic.start + table.offset; + asm.movd_r_m(base, X86_64Addr.new(null, opcode, 4, int.!(addr - Pointer.NULL))); + } + asm.ijmp_r(base); + } + 8 => { + if (table == null) { + asm.ijmp_m(r_dispatch.plusR(opcode, 8, 0)); + } else { + var addr = ic.start + table.offset; + asm.ijmp_m(X86_64Addr.new(null, opcode, 8, int.!(addr - Pointer.NULL))); + } + } + } + } + def emit_intentional_crash() { recordCurSourceLoc(); asm.invalid(); @@ -1700,4 +1738,4 @@ def TRUNC_i32_f64_u = FloatTrunc.new(false, true, false); def TRUNC_i64_f32_s = FloatTrunc.new(true, false, true); def TRUNC_i64_f32_u = FloatTrunc.new(true, false, false); def TRUNC_i64_f64_s = FloatTrunc.new(true, true, true); -def TRUNC_i64_f64_u = FloatTrunc.new(true, true, false); \ No newline at end of file +def TRUNC_i64_f64_u = FloatTrunc.new(true, true, false);