From 142750e7b99974f274bd3354dd61447ba83df5f2 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Thu, 29 Jan 2026 17:43:03 -0500 Subject: [PATCH 1/2] [WIP] Migrate VspHelper to use MasmAddr --- src/engine/compiler/MacroAssembler.v3 | 1 + src/engine/compiler/RegSet.v3 | 8 +- src/engine/x86-64/X86_64Interpreter.v3 | 689 +++++++++++----------- src/engine/x86-64/X86_64MacroAssembler.v3 | 132 +++++ 4 files changed, 481 insertions(+), 349 deletions(-) diff --git a/src/engine/compiler/MacroAssembler.v3 b/src/engine/compiler/MacroAssembler.v3 index d3868cf1e..fbd5d836f 100644 --- a/src/engine/compiler/MacroAssembler.v3 +++ b/src/engine/compiler/MacroAssembler.v3 @@ -299,6 +299,7 @@ class MacroAssembler(valuerep: Tagging, regConfig: RegConfig) { def emit_binop_r_r(op: Opcode, reg: Reg, reg2: Reg); def emit_binop_r_m(op: Opcode, reg: Reg, addr: MasmAddr); + def emit_binop_m_r(op: Opcode, addr: MasmAddr, reg: Reg); def emit_binop_r_i(op: Opcode, reg: Reg, val: int); def emit_pop_r(kind: ValueKind, reg: Reg); diff --git a/src/engine/compiler/RegSet.v3 b/src/engine/compiler/RegSet.v3 index 0d4be1dda..4d7257990 100644 --- a/src/engine/compiler/RegSet.v3 +++ b/src/engine/compiler/RegSet.v3 @@ -3,7 +3,11 @@ // Architecture-independent representation of a register for use in {MacroAssembler} and portable // parts of compilers. Kept small to keep data structures small. The name is stored in a {RegSet}. -type Reg(index: byte) #unboxed { } +type Reg(index: byte) #unboxed { + def plus(offset: int) -> MasmAddr { + return MasmAddr(this, offset); + } +} // Describes the set of (maximum 256) registers for a target. // By convention, register #0 is reserved for indicating an unallocated register or no register. @@ -49,4 +53,4 @@ class RegPool32(regs: Array) { map = Array.new(max + 1); for (i < regs.length) map[regs[i].index] = u5.!(i); } -} \ No newline at end of file +} diff --git a/src/engine/x86-64/X86_64Interpreter.v3 b/src/engine/x86-64/X86_64Interpreter.v3 index 30b4090da..2980390c7 100644 --- a/src/engine/x86-64/X86_64Interpreter.v3 +++ b/src/engine/x86-64/X86_64Interpreter.v3 @@ -220,8 +220,8 @@ class IntExecEnv { // Internal register configuration for variables live in the interpreter execution context. def R: X86_64Regs, GPRs = X86_64Regs.GPRs, C: X86_64Conds; // Helper for various slot addresses. -type SlotAddrs(tag: X86_64Addr, value: X86_64Addr, upper: X86_64Addr) #unboxed { } -class VspHelper(vsp: X86_64Gpr, valuerep: Tagging, depth: int) { +type SlotAddrs(tag: MasmAddr, value: MasmAddr, upper: MasmAddr) #unboxed { } +class VspHelper(vsp: Reg, valuerep: Tagging, depth: int) { private def slots = Array.new(depth + 1); new() { for (i < slots.length) { @@ -242,6 +242,9 @@ def TYPE_IS_LEB: byte = 0x40; def LEB_UPPER_BIT: byte = 0x80; def G = X86_64MasmRegs.toGpr; def X = X86_64MasmRegs.toXmmr; +def A(ma: MasmAddr) -> X86_64Addr { + return X86_64Addr.new(G(ma.base), null, 1, ma.offset); +} // Generates {X86_64InterpreterCode} for X86-64. class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { @@ -263,8 +266,8 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { var hostTailCallStubLabel = masm.newLabel(-1); var hostCallStubLabel = masm.newLabel(-1); var spcEntryLabel = X86_64Label.new(); - var controlFallThruLabel = X86_64Label.new(); - var controlTransferLabel = X86_64Label.new(); + var controlFallThruLabel = masm.newLabel(-1); + var controlTransferLabel = masm.newLabel(-1); var controlSkipSidetableAndDispatchLabel = X86_64Label.new(); var probedDispatchTableRef: IcCodeRef; var typeTagTableOffset: int; @@ -323,7 +326,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { def k_frame_size = X86_64InterpreterFrame.size; - def vsph = VspHelper.new(r_vsp, valuerep, 3); + def vsph = VspHelper.new(xenv.vsp, valuerep, 3); def dispatchTables = Array<(byte, IcCodeRef, IcCodeRef, byte, IcCodeRef)>.new( Opcodes.code_pages.length + Opcodes.num_subpages + 1); @@ -676,8 +679,8 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.d.cmp_r_i(numGpr, 0); asm.jc_rel_near(C.Z, done2); asm.bind(start2); - genTagPushR(typeGpr); // *(sp) = type - asm.movq_m_i(vsph[0].value, 0); // *(sp + 8) = 0 + genTagPushR(xenv.tmp2 /* XXX typeGpr */); // *(sp) = type + masm.emit_mov_m_l(vsph[0].value, 0); // *(sp + 8) = 0 incrementVsp(); // sp += 1 slot // gen: while (--num != 0) asm.d.dec_r(numGpr); @@ -705,8 +708,8 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.q.cmp_r_r(r_start, r_end); asm.jc_rel_far(C.GE, retpoint); asm.movbzx_r_m(r_tag, r_start.plus(0)); - genTagPushR(r_tag); // *(sp) = type - asm.movq_m_i(vsph[0].value, 0); // *(sp + 8) = 0 + genTagPushR(xenv.tmp3 /* XXX r_tag */); // *(sp) = type + masm.emit_mov_m_l(vsph[0].value, 0); // *(sp + 8) = 0 incrementVsp(); // sp += 1 slot // gen: while (--num != 0) asm.q.inc_r(r_start); @@ -956,14 +959,14 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { bindHandler(Opcode.I32_CONST); { genReadSleb32_inline(r_tmp1); genTagPush(BpTypeCode.I32.code); - asm.movq_m_r(vsph[0].value, r_tmp1); + masm.emit_mov_m_r(ValueKind.I64, vsph[0].value, xenv.tmp1); incrementVsp(); endHandler(); } bindHandler(Opcode.I64_CONST); { genReadSleb64_inline(r_tmp1); genTagPush(BpTypeCode.I64.code); - asm.movq_m_r(vsph[0].value, r_tmp1); + masm.emit_mov_m_r(ValueKind.I64, vsph[0].value, xenv.tmp1); incrementVsp(); endHandler(); } @@ -971,7 +974,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.movd_r_m(r_tmp0, ip_ptr); asm.add_r_i(r_ip, 4); genTagPush(BpTypeCode.F32.code); - asm.movq_m_r(vsph[0].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I64, vsph[0].value, xenv.tmp0); incrementVsp(); endHandler(); } @@ -979,7 +982,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.movq_r_m(r_tmp0, ip_ptr); asm.add_r_i(r_ip, 8); genTagPush(BpTypeCode.F64.code); - asm.movq_m_r(vsph[0].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I64, vsph[0].value, xenv.tmp0); incrementVsp(); endHandler(); } @@ -1018,13 +1021,12 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { endHandler(); } - var ctl_xfer_nostack = X86_64Label.new(); + var ctl_xfer_nostack = masm.newLabel(-1); // IF: check condition and either fall thru to next bytecode or ctl xfer (without stack copying) bindHandler(Opcode.IF); decrementVsp(); - asm.d.cmp_m_i(vsph[0].value, 0); - asm.jc_rel_far(C.Z, ctl_xfer_nostack); // XXX: can be near if no complex block types + masm.emit_br_m(vsph[0].value, MasmBrCond.I32_ZERO, ctl_xfer_nostack); // XXX: can be near if no complex block types genSkipBlockType(); asm.bind(controlSkipSidetableAndDispatchLabel); genSkipSidetableEntry(); @@ -1033,19 +1035,18 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // BR_IF: check condition and either fall thru to next bytecode or ctl xfer (with stack copying) bindHandler(Opcode.BR_IF); decrementVsp(); - asm.d.cmp_m_i(vsph[0].value, 0); - asm.jc_rel_far(C.Z, controlFallThruLabel); // XXX: move shared fallthrough closer? + masm.emit_br_m(vsph[0].value, MasmBrCond.I32_ZERO, controlFallThruLabel); // XXX: can be near if no complex block types // fallthru to BR // BR: unconditional ctl xfer with stack copying bindHandlerNoAlign(Opcode.BR); - asm.bind(controlTransferLabel); + masm.bindLabel(controlTransferLabel); var popcount = r_tmp0; var valcount = r_tmp1; // if popcount > 0 asm.movd_r_m(popcount, r_stp.plus(Sidetable_BrEntry.popcount.offset)); asm.d.cmp_r_i(popcount, 0); - asm.jc_rel_near(C.Z, ctl_xfer_nostack); + asm.jc_rel_near(C.Z, ctl_xfer_nostack.label); // load valcount asm.movd_r_m(valcount, r_stp.plus(Sidetable_BrEntry.valcount.offset)); // popcount = popcount * SLOT_SIZE @@ -1059,7 +1060,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { var loop = X86_64Label.new(); asm.bind(loop); asm.d.dec_r(valcount); - asm.jc_rel_near(C.S, ctl_xfer_nostack); + asm.jc_rel_near(C.S, ctl_xfer_nostack.label); genCopySlot(r_vsp.plus(0), r_vsp.plusR(popcount, 1, 0)); incrementVsp(); asm.jmp_rel_near(loop); @@ -1068,7 +1069,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { bindHandlerNoAlign(Opcode.CATCH); bindHandlerNoAlign(Opcode.CATCH_ALL); bindHandlerNoAlign(Opcode.ELSE); - asm.bind(ctl_xfer_nostack); + masm.bindLabel(ctl_xfer_nostack); if (FastIntTuning.fourByteSidetable) { // load and sign-extend a 4-byte pc delta asm.movd_r_m(r_tmp0, r_stp.plus(Sidetable_BrEntry.pc_delta.offset)); asm.q.shl_r_i(r_tmp0, 32); @@ -1092,7 +1093,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { var max = r_tmp0, key = r_tmp1; asm.movd_r_m(max, r_stp.plus(Sidetable_BrEntry.pc_delta.offset)); decrementVsp(); - asm.movd_r_m(key, vsph[0].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp1 /* key */, vsph[0].value); asm.d.cmp_r_r(key, max); var ok = X86_64Label.new(); asm.jc_rel_near(C.NC, ok); @@ -1102,40 +1103,37 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.q.add_r_r(r_ip, max); asm.shl_r_i(max, u6.!(Ints.log(u32.!(Sidetable_BrEntry.size)))); asm.q.add_r_r(r_stp, max); - asm.jmp_rel_near(controlTransferLabel); + masm.emit_br(controlTransferLabel); // BR_ON_NULL: check condition and either fall thru to next bytecode or ctl xfer (with stack copying) bindHandler(Opcode.BR_ON_NULL); - asm.q.cmp_m_i(vsph[-1].value, 0); - asm.jc_rel_near(C.NZ, controlFallThruLabel); + masm.emit_br_m(vsph[-1].value, MasmBrCond.REF_NULL, controlFallThruLabel); decrementVsp(); - asm.jmp_rel_near(controlTransferLabel); + masm.emit_br(controlTransferLabel); // BR_ON_NON_NULL: check condition and either fall thru to next bytecode or ctl xfer (with stack copying) bindHandler(Opcode.BR_ON_NON_NULL); - asm.q.cmp_m_i(vsph[-1].value, 0); - asm.jc_rel_near(C.NZ, controlTransferLabel); + masm.emit_br_m(vsph[-1].value, MasmBrCond.REF_NONNULL, controlTransferLabel); decrementVsp(); // shared code for not-taken banches - asm.bind(controlFallThruLabel); + masm.bindLabel(controlFallThruLabel); genSkipLeb(); genSkipSidetableEntry(); endHandler(); bindHandler(Opcode.SELECT); { - var label = X86_64Label.new(); - asm.d.cmp_m_i(vsph[-1].value, 0); - asm.jc_rel_near(C.NZ, label); + var label = masm.newLabel(-1); + masm.emit_br_m(vsph[-1].value, MasmBrCond.I32_NONZERO, label); // false case; copy false value down if (valuerep.value_size == 16) { - asm.movdqu_s_m(r_xmm0, vsph[-2].value); - asm.movdqu_m_s(vsph[-3].value, r_xmm0); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-2].value); // XXX not really the exact valuekind + masm.emit_mov_m_r(ValueKind.V128, vsph[-3].value, xenv.xmm0); } else { - asm.movq_r_m(r_tmp0, vsph[-2].value); - asm.movq_m_r(vsph[-3].value, r_tmp0); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-2].value); // XXX not exact valuekind + masm.emit_mov_m_r(ValueKind.I64, vsph[-3].value, xenv.tmp0); } // true case, nothing to do - asm.bind(label); + masm.bindLabel(label); adjustVsp(-2); endHandler(); } @@ -1149,7 +1147,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.jc_rel_near(C.NZ, skip); asm.d.shl_r_i(r_tmp0, valuerep.slot_size_log); - asm.movd_r_m(r_tmp1, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp1, vsph[-1].value); asm.sub_r_r(r_vsp, r_tmp0); decrementVsp(); // XXX: combine with above using lea asm.d.cmp_r_i(r_tmp1, 0); @@ -1208,7 +1206,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { bindHandler(Opcode.THROW_REF); { computeCurIpForTrap(-1); computePcFromCurIp(); - asm.movd_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); decrementVsp(); masm.emit_get_curstack(xenv.tmp1); saveCallerIVars(); @@ -1259,10 +1257,10 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.d.shl_r_i(r_tmp0, valuerep.slot_size_log); decrementVsp(); if (valuerep.value_size == 16) { - asm.movdqu_s_m(r_xmm0, vsph[0].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[0].value); asm.movdqu_m_s(r_vfp.plusR(r_tmp0, 1, valuerep.tag_size), r_xmm0); } else { - asm.movq_r_m(r_tmp1, vsph[0].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[0].value); asm.movq_m_r(r_vfp.plusR(r_tmp0, 1, valuerep.tag_size), r_tmp1); } endHandler(); @@ -1271,10 +1269,10 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { genReadUleb32(r_tmp0); asm.d.shl_r_i(r_tmp0, valuerep.slot_size_log); if (valuerep.value_size == 16) { - asm.movdqu_s_m(r_xmm0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-1].value); asm.movdqu_m_s(r_vfp.plusR(r_tmp0, 1, valuerep.tag_size), r_xmm0); } else { - asm.movq_r_m(r_tmp1, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-1].value); asm.movq_m_r(r_vfp.plusR(r_tmp0, 1, valuerep.tag_size), r_tmp1); } endHandler(); @@ -1349,7 +1347,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { computeCurIpForTrap(-1); genSkipLeb(); // skip signature index decrementVsp(); - asm.movq_r_m(func_arg, vsph[0].value); + masm.emit_mov_r_m(ValueKind.REF, xenv.func_arg, vsph[0].value); asm.q.cmp_r_i(func_arg, 0); asm.jc_rel_near(X86_64Conds.NZ, callFunction); asm.jmp_rel_far(newTrapLabel(TrapReason.NULL_DEREF)); @@ -1393,7 +1391,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { computeCurIpForTrap(-1); genSkipLeb(); // skip signature index decrementVsp(); - asm.movq_r_m(func_arg, vsph[0].value); + masm.emit_mov_r_m(ValueKind.REF, xenv.func_arg, vsph[0].value); asm.q.cmp_r_i(func_arg, 0); asm.jc_rel_near(X86_64Conds.NZ, tailCallFunction); asm.jmp_rel_far(newTrapLabel(TrapReason.NULL_DEREF)); @@ -1411,7 +1409,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { genReadUleb32(table_index); decrementVsp(); - asm.movd_r_m(func_index, vsph[0].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0 /* XXX func_index */, vsph[0].value); var tmp = r_tmp2; var sig_id = r_tmp3; @@ -1490,21 +1488,21 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.jc_rel_near(C.Z, runtime_call); // do inline global get or set if (t.0 == Opcode.GLOBAL_GET) { - genTagPushR(r_tmp1); // set the tag + genTagPushR(xenv.tmp1); // set the tag if (valuerep.value_size == 16) { asm.movdqu_s_m(r_xmm0, r_global.plus(offsets.Global_low)); - asm.movdqu_m_s(vsph[0].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[0].value, xenv.xmm0); } else { asm.movq_r_m(r_tmp0, r_global.plus(offsets.Global_low)); - asm.movq_m_r(vsph[0].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I64, vsph[0].value, xenv.tmp0); } incrementVsp(); } else { if (valuerep.value_size == 16) { - asm.movdqu_s_m(r_xmm0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-1].value); asm.movdqu_m_s(r_global.plus(offsets.Global_low), r_xmm0); } else { - asm.movq_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-1].value); asm.movq_m_r(r_global.plus(offsets.Global_low), r_tmp0); } decrementVsp(); @@ -1621,11 +1619,9 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { (Opcode.I32_GE_U, C.NC) ]) { bindHandler(t.0); - asm.movd_r_m(r_tmp0, vsph[-1].value); - asm.d.cmp_m_r(vsph[-2].value, r_tmp0); - asm.set_r(t.1, r_tmp0); - asm.movbzx_r_r(r_tmp0, r_tmp0); - asm.movd_m_r(vsph[-2].value, r_tmp0); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); + masm.emit_cmpd_m_r(t.1, vsph[-2].value, xenv.tmp0); + masm.emit_mov_m_r(ValueKind.I32, vsph[-2].value, xenv.tmp0); decrementVsp(); endHandler(); } @@ -1644,12 +1640,10 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { (Opcode.I64_GE_U, C.NC) ]) { bindHandler(t.0); - asm.movq_r_m(r_tmp0, vsph[-1].value); - asm.q.cmp_m_r(vsph[-2].value, r_tmp0); - asm.set_r(t.1, r_tmp0); - asm.movbzx_r_r(r_tmp0, r_tmp0); - asm.movq_m_r(vsph[-2].value, r_tmp0); - if (valuerep.tagged) asm.movq_m_i(vsph[-2].tag, BpTypeCode.I32.code); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-1].value); + masm.emit_cmpq_m_r(t.1, vsph[-2].value, xenv.tmp0); + masm.emit_mov_m_r(ValueKind.I64, vsph[-2].value, xenv.tmp0); + if (valuerep.tagged) masm.emit_mov_m_l(vsph[-2].tag, BpTypeCode.I32.code); // XXX check this decrementVsp(); endHandler(); } @@ -1657,37 +1651,30 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { } def genI32Arith() { bindHandler(Opcode.I32_EQZ); { - asm.d.test_m_i(vsph[-1].value, -1); + masm.emit_testd_m_i(vsph[-1].value, -1); asm.set_r(C.Z, r_tmp0); asm.movbzx_r_r(r_tmp0, r_tmp0); - asm.movd_m_r(vsph[-1].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); endHandler(); } bindHandler(Opcode.I32_CLZ); { - asm.movd_r_i(r_tmp1, -1); - asm.d.bsr_r_m(r_tmp0, vsph[-1].value); - asm.d.cmov_r(C.Z, r_tmp0, r_tmp1); - asm.movd_r_i(r_tmp1, 31); - asm.d.sub_r_r(r_tmp1, r_tmp0); - asm.movd_m_r(vsph[-1].value, r_tmp1); + masm.emit_i32_clz_r_m(xenv.tmp0, vsph[-1].value); endHandler(); } bindHandler(Opcode.I32_CTZ); { - asm.d.bsf_r_m(r_tmp0, vsph[-1].value); - asm.movd_r_i(r_tmp1, 32); - asm.d.cmov_r(C.Z, r_tmp0, r_tmp1); - asm.movd_m_r(vsph[-1].value, r_tmp0); + masm.emit_i32_ctz_r_m(xenv.tmp0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); // XXX check this endHandler(); } bindHandler(Opcode.I32_POPCNT); { - asm.d.popcnt_r_m(r_tmp0, vsph[-1].value); - asm.movd_m_r(vsph[-1].value, r_tmp0); + masm.emit_popcntd_r_m(xenv.tmp0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); endHandler(); } bindHandler(Opcode.I32_MUL); { - asm.movd_r_m(r_tmp0, vsph[-1].value); - asm.d.imul_r_m(r_tmp0, vsph[-2].value); - asm.movd_m_r(vsph[-2].value, r_tmp0); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); + masm.emit_binop_r_m(Opcode.I32_MUL, xenv.tmp0, vsph[-2].value); + masm.emit_mov_m_r(ValueKind.I32, vsph[-2].value, xenv.tmp0); decrementVsp(); endHandler(); } @@ -1699,27 +1686,29 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { ]) { bindHandler(t.0); computeCurIpForTrap(-1); - asm.movd_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); spillReg(R.RAX); spillReg(R.RDX); - asm.movd_r_m(R.RAX, vsph[-2].value); + def a = X86_64Addr.new(G(vsph[-2].value.base), null, 1, vsph[-2].value.offset); + // XXX x86-64 address used here + asm.movd_r_m(R.RAX, a); t.1(r_tmp0); - asm.movd_m_r(vsph[-2].value, t.2); + asm.movd_m_r(a, t.2); restoreReg(R.RAX); restoreReg(R.RDX); decrementVsp(); endHandler(); } for (t in [ - (Opcode.I32_ADD, asm.d.add_m_r), - (Opcode.I32_SUB, asm.d.sub_m_r), - (Opcode.I32_AND, asm.d.and_m_r), - (Opcode.I32_OR, asm.d.or_m_r), - (Opcode.I32_XOR, asm.d.xor_m_r) + Opcode.I32_ADD, + Opcode.I32_SUB, + Opcode.I32_AND, + Opcode.I32_OR, + Opcode.I32_XOR ]) { - bindHandler(t.0); - asm.movd_r_m(r_tmp0, vsph[-1].value); - t.1(vsph[-2].value, r_tmp0); + bindHandler(t); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); + masm.emit_binop_m_r(t, vsph[-2].value, xenv.tmp0); decrementVsp(); endHandler(); } @@ -1731,46 +1720,42 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { (Opcode.I32_ROTR, asm.d.ror_m_cl) ]) { bindHandler(t.0); - asm.movd_r_m(R.RCX, vsph[-1].value); - t.1(vsph[-2].value); + def a1 = X86_64Addr.new(G(vsph[-1].value.base), null, 1, vsph[-1].value.offset); + def a2 = X86_64Addr.new(G(vsph[-2].value.base), null, 1, vsph[-2].value.offset); + // XXX x86-64 address used here + asm.movd_r_m(R.RCX, a1); + t.1(a2); decrementVsp(); endHandler(); } } def genI64Arith() { bindHandler(Opcode.I64_EQZ); { - asm.q.test_m_i(vsph[-1].value, -1); + masm.emit_testq_m_i(vsph[-1].value, -1); asm.set_r(C.Z, r_tmp0); asm.movbzx_r_r(r_tmp0, r_tmp0); - asm.movd_m_r(vsph[-1].value, r_tmp0); - if (valuerep.tagged) asm.movd_m_i(vsph[-1].tag, BpTypeCode.I32.code); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); + if (valuerep.tagged) masm.emit_mov_m_i(vsph[-1].tag, BpTypeCode.I32.code); endHandler(); } bindHandler(Opcode.I64_CLZ); { - asm.movq_r_i(r_tmp1, -1); - asm.q.bsr_r_m(r_tmp0, vsph[-1].value); - asm.q.cmov_r(C.Z, r_tmp0, r_tmp1); - asm.movd_r_i(r_tmp1, 63); - asm.q.sub_r_r(r_tmp1, r_tmp0); - asm.movq_m_r(vsph[-1].value, r_tmp1); + masm.emit_i64_clz_r_m(xenv.tmp0, vsph[-1].value); endHandler(); } bindHandler(Opcode.I64_CTZ); { - asm.q.bsf_r_m(r_tmp0, vsph[-1].value); - asm.movd_r_i(r_tmp1, 64); - asm.q.cmov_r(C.Z, r_tmp0, r_tmp1); - asm.movq_m_r(vsph[-1].value, r_tmp0); + masm.emit_i64_ctz_r_m(xenv.tmp0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.I64, vsph[-1].value, xenv.tmp0); endHandler(); } bindHandler(Opcode.I64_POPCNT); { - asm.q.popcnt_r_m(r_tmp0, vsph[-1].value); - asm.movq_m_r(vsph[-1].value, r_tmp0); + masm.emit_popcntq_r_m(xenv.tmp0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.I64, vsph[-1].value, xenv.tmp0); endHandler(); } bindHandler(Opcode.I64_MUL); { - asm.movq_r_m(r_tmp0, vsph[-1].value); - asm.q.imul_r_m(r_tmp0, vsph[-2].value); - asm.movq_m_r(vsph[-2].value, r_tmp0); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-1].value); + masm.emit_binop_r_m(Opcode.I32_MUL, xenv.tmp0, vsph[-2].value); + masm.emit_mov_m_r(ValueKind.I64, vsph[-2].value, xenv.tmp0); decrementVsp(); endHandler(); } @@ -1782,27 +1767,29 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { ]) { bindHandler(t.0); computeCurIpForTrap(-1); - asm.movq_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-1].value); spillReg(R.RAX); spillReg(R.RDX); - asm.movq_r_m(R.RAX, vsph[-2].value); + def a = X86_64Addr.new(G(vsph[-2].value.base), null, 1, vsph[-2].value.offset); + // XXX x86-64 registers used here + asm.movq_r_m(R.RAX, a); t.1(r_tmp0); - asm.movq_m_r(vsph[-2].value, t.2); + asm.movq_m_r(a, t.2); restoreReg(R.RAX); restoreReg(R.RDX); decrementVsp(); endHandler(); } for (t in [ - (Opcode.I64_ADD, asm.q.add_m_r), - (Opcode.I64_SUB, asm.q.sub_m_r), - (Opcode.I64_AND, asm.q.and_m_r), - (Opcode.I64_OR, asm.q.or_m_r), - (Opcode.I64_XOR, asm.q.xor_m_r) + Opcode.I64_ADD, + Opcode.I64_SUB, + Opcode.I64_AND, + Opcode.I64_OR, + Opcode.I64_XOR ]) { - bindHandler(t.0); - asm.movq_r_m(r_tmp0, vsph[-1].value); - t.1(vsph[-2].value, r_tmp0); + bindHandler(t); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-1].value); + masm.emit_binop_m_r(t, vsph[-2].value, xenv.tmp0); decrementVsp(); endHandler(); } @@ -1814,8 +1801,11 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { (Opcode.I64_ROTR, asm.q.ror_m_cl) ]) { bindHandler(t.0); - asm.movq_r_m(R.RCX, vsph[-1].value); - t.1(vsph[-2].value); + def a1 = X86_64Addr.new(G(vsph[-1].value.base), null, 1, vsph[-1].value.offset); + def a2 = X86_64Addr.new(G(vsph[-2].value.base), null, 1, vsph[-2].value.offset); + // XXX x86-64 address used here + asm.movq_r_m(R.RCX, a1); + t.1(a2); decrementVsp(); endHandler(); } @@ -1922,20 +1912,20 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { } genReadUleb32(r_tmp0); // decode offset if (isCmpAndExchange) { - asm.movq_r_m(r_tmp1, vsph[-3].value); // read index + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-3].value); // read index asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset - asm.movq_r_m(r_tmp1, vsph[-2].value); // new value for cmpxchg + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-2].value); // new value for cmpxchg spillReg(R.RAX); - asm.movq_r_m(R.RAX, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.ret_throw /* XXX rax */, vsph[-1].value); asm.lock(); op(r_mem0_base.plusR(r_tmp0, 1, 0), r_tmp1); // asm.movq_r_m(r_tmp1, r_mem0_base.plusR(r_tmp0, 1, 0)); // This will return the return of the operation restoreReg(R.RAX); // Restore the original RAX if it was used elsewhere decrementVsp(); } else { - asm.movq_r_m(r_tmp1, vsph[-2].value); // read index + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-2].value); // read index asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset - asm.movq_r_m(r_tmp1, vsph[-1].value); // read value + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-1].value); // read value if (neg != null) { neg(r_tmp1); } else if (exchange != null) { @@ -1944,7 +1934,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.lock(); op(r_mem0_base.plusR(r_tmp0, 1, 0), r_tmp1); // asm.movq_r_m(r_tmp1, r_mem0_base.plusR(r_tmp0, 1, 0)); // This will return the return of the operation - asm.xchgq_m_r(vsph[-1].value, r_tmp1); + masm.emit_xchgq_m_r(vsph[-1].value, xenv.tmp1); } asm.bind(finish); endHandler(); @@ -1958,20 +1948,20 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { genReadUleb32(r_tmp0); // decode offset if (isCmpAndExchange) { - asm.movq_r_m(r_tmp1, vsph[-3].value); // read index + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-3].value); // read index asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset - asm.movq_r_m(r_tmp1, vsph[-2].value); // new value for cmpxchg + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-2].value); // new value for cmpxchg spillReg(R.RAX); - asm.movq_r_m(R.RAX, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.ret_throw /* XXX rax */, vsph[-1].value); asm.lock(); op(r_mem0_base.plusR(r_tmp0, 1, 0), r_tmp1); // asm.movq_r_m(r_tmp1, r_mem0_base.plusR(r_tmp0, 1, 0)); // This will return the return of the operation restoreReg(R.RAX); // Restore the original RAX if it was used elsewhere decrementVsp(); } else { - asm.movq_r_m(r_tmp1, vsph[-2].value); // read index + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-2].value); // read index asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset - asm.movq_r_m(r_tmp1, vsph[-1].value); // read value + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-1].value); // read value if (neg != null) { neg(r_tmp1); } else if (exchange != null) { @@ -1980,7 +1970,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.lock(); op(r_mem0_base.plusR(r_tmp0, 1, 0), r_tmp1); // asm.movq_r_m(r_tmp1, r_mem0_base.plusR(r_tmp0, 1, 0)); // This will return the return of the operation - asm.xchgq_m_r(vsph[-1].value, r_tmp1); + masm.emit_xchgq_m_r(vsph[-1].value, xenv.tmp1); } asm.jmp_rel_near(finish); } @@ -2022,89 +2012,91 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { endHandler(); } bindHandler(Opcode.I32_EXTEND8_S); { - asm.d.movbsx_r_m(r_tmp0, vsph[-1].value); - asm.movd_m_r(vsph[-1].value, r_tmp0); + masm.emit_i32_extend8_s_r_m(xenv.tmp0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); endHandler(); } bindHandler(Opcode.I32_EXTEND16_S); { - asm.d.movwsx_r_m(r_tmp0, vsph[-1].value); - asm.movd_m_r(vsph[-1].value, r_tmp0); + masm.emit_i32_extend16_s_r_m(xenv.tmp0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); endHandler(); } bindHandler(Opcode.I64_EXTEND8_S); { - asm.q.movbsx_r_m(r_tmp0, vsph[-1].value); - asm.movq_m_r(vsph[-1].value, r_tmp0); + masm.emit_i64_extend8_s_r_m(xenv.tmp0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.I64, vsph[-1].value, xenv.tmp0); endHandler(); } bindHandler(Opcode.I64_EXTEND16_S); { - asm.q.movwsx_r_m(r_tmp0, vsph[-1].value); - asm.movq_m_r(vsph[-1].value, r_tmp0); + masm.emit_i64_extend16_s_r_m(xenv.tmp0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.I64, vsph[-1].value, xenv.tmp0); endHandler(); } bindHandler(Opcode.I64_EXTEND_I32_S); bindHandler(Opcode.I64_EXTEND32_S); { genTagUpdate(BpTypeCode.I64.code); - asm.movd_r_m(r_tmp0, vsph[-1].value); + // TODO see emit_movq_32s_r_m + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); asm.q.shl_r_i(r_tmp0, 32); asm.q.sar_r_i(r_tmp0, 32); - asm.movq_m_r(vsph[-1].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I64, vsph[-1].value, xenv.tmp0); endHandler(); } bindHandler(Opcode.I64_EXTEND_I32_U); { genTagUpdate(BpTypeCode.I64.code); - asm.movd_m_i(vsph[-1].value.plus(4), 0); // zero upper portion + masm.emit_mov_m_i(vsph[-1].value.plus(4), 0); // zero upper portion endHandler(); } } def genF32Arith() { bindHandler(Opcode.F32_ABS); { - asm.d.and_m_i(vsph[-1].value, 0x7FFFFFFF); // explicit update of upper word + masm.emit_and_m_i(vsph[-1].value, 0x7FFFFFFF); // explicit update of upper word endHandler(); } bindHandler(Opcode.F32_NEG); { - asm.d.xor_m_i(vsph[-1].value, 0x80000000); // explicit update of upper word + masm.emit_xor_m_i(vsph[-1].value, 0x80000000); // explicit update of upper word endHandler(); } + // TODO make into loop bindHandler(Opcode.F32_ADD); { - asm.movss_s_m(r_xmm0, vsph[-2].value); - asm.addss_s_m(r_xmm0, vsph[-1].value); - asm.movss_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_r_m(ValueKind.F32, xenv.xmm0, vsph[-2].value); + masm.emit_binop_r_m(Opcode.F32_ADD, xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F32, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } bindHandler(Opcode.F32_SUB); { - asm.movss_s_m(r_xmm0, vsph[-2].value); - asm.subss_s_m(r_xmm0, vsph[-1].value); - asm.movss_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_r_m(ValueKind.F32, xenv.xmm0, vsph[-2].value); + masm.emit_binop_r_m(Opcode.F32_SUB, xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F32, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } bindHandler(Opcode.F32_MUL); { - asm.movss_s_m(r_xmm0, vsph[-2].value); - asm.mulss_s_m(r_xmm0, vsph[-1].value); - asm.movss_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_r_m(ValueKind.F32, xenv.xmm0, vsph[-2].value); + masm.emit_binop_r_m(Opcode.F32_MUL, xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F32, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } bindHandler(Opcode.F32_DIV); { - asm.movss_s_m(r_xmm0, vsph[-2].value); - asm.divss_s_m(r_xmm0, vsph[-1].value); - asm.movss_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_r_m(ValueKind.F32, xenv.xmm0, vsph[-2].value); + masm.emit_binop_r_m(Opcode.F32_DIV, xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F32, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } bindHandler(Opcode.F32_SQRT); { - asm.sqrtss_s_m(r_xmm0, vsph[-1].value); - asm.movss_m_s(vsph[-1].value, r_xmm0); + masm.emit_sqrtf_r_m(xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F32, vsph[-1].value, xenv.xmm0); endHandler(); } bindHandler(Opcode.F32_COPYSIGN); { - asm.movd_r_m(r_tmp0, vsph[-2].value); // XXX: tradeoff between memory operands and extra regs? - asm.d.and_r_i(r_tmp0, 0x7FFFFFFF); - asm.movd_r_m(r_tmp1, vsph[-1].value); - asm.d.and_r_i(r_tmp1, 0x80000000); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-2].value); // XXX: tradeoff between memory operands and extra regs? + masm.emit_and_r_i(xenv.tmp0, 0x7FFFFFFF); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp1, vsph[-1].value); + masm.emit_and_r_i(xenv.tmp1, 0x80000000); asm.d.or_r_r(r_tmp0, r_tmp1); - asm.movd_m_r(vsph[-2].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I32, vsph[-2].value, xenv.tmp0); decrementVsp(); endHandler(); } @@ -2115,60 +2107,61 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { (Opcode.F32_NEAREST, X86_64Rounding.TO_NEAREST) ]) { bindHandler(t.0); - asm.roundss_s_m(r_xmm0, vsph[-1].value, t.1); - asm.movss_m_s(vsph[-1].value, r_xmm0); + masm.emit_roundf_r_m(xenv.xmm0, vsph[-1].value, t.1); + masm.emit_mov_m_r(ValueKind.F32, vsph[-1].value, xenv.xmm0); endHandler(); } } def genF64Arith() { bindHandler(Opcode.F64_ABS); { - asm.d.and_m_i(vsph[-1].upper, 0x7FFFFFFF); + masm.emit_and_m_i(vsph[-1].upper, 0x7FFFFFFF); endHandler(); } bindHandler(Opcode.F64_NEG); { - asm.d.xor_m_i(vsph[-1].upper, 0x80000000); + masm.emit_xor_m_i(vsph[-1].upper, 0x80000000); endHandler(); } bindHandler(Opcode.F64_ADD); { - asm.movsd_s_m(r_xmm0, vsph[-2].value); - asm.addsd_s_m(r_xmm0, vsph[-1].value); - asm.movsd_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_r_m(ValueKind.F64, xenv.xmm0, vsph[-2].value); + masm.emit_binop_r_m(Opcode.F64_ADD, xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F64, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } bindHandler(Opcode.F64_SUB); { - asm.movsd_s_m(r_xmm0, vsph[-2].value); - asm.subsd_s_m(r_xmm0, vsph[-1].value); - asm.movsd_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_r_m(ValueKind.F64, xenv.xmm0, vsph[-2].value); + masm.emit_binop_r_m(Opcode.F64_SUB, xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F64, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } bindHandler(Opcode.F64_MUL); { - asm.movsd_s_m(r_xmm0, vsph[-2].value); - asm.mulsd_s_m(r_xmm0, vsph[-1].value); - asm.movsd_m_s(vsph[-2].value, r_xmm0); - decrementVsp(); + masm.emit_mov_r_m(ValueKind.F64, xenv.xmm0, vsph[-2].value); + masm.emit_binop_r_m(Opcode.F64_MUL, xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F64, vsph[-2].value, xenv.xmm0); + decrementVsp(); endHandler(); } bindHandler(Opcode.F64_DIV); { - asm.movsd_s_m(r_xmm0, vsph[-2].value); - asm.divsd_s_m(r_xmm0, vsph[-1].value); - asm.movsd_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_r_m(ValueKind.F64, xenv.xmm0, vsph[-2].value); + masm.emit_binop_r_m(Opcode.F64_DIV, xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F64, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } bindHandler(Opcode.F64_SQRT); { - asm.sqrtsd_s_m(r_xmm0, vsph[-1].value); - asm.movsd_m_s(vsph[-1].value, r_xmm0); + masm.emit_sqrtd_r_m(xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F64, vsph[-1].value, xenv.xmm0); endHandler(); } bindHandler(Opcode.F64_COPYSIGN); { - asm.movd_r_m(r_tmp0, vsph[-2].upper); // XXX: tradeoff between memory operands and extra regs? - asm.d.and_r_i(r_tmp0, 0x7FFFFFFF); - asm.movd_r_m(r_tmp1, vsph[-1].upper); - asm.d.and_r_i(r_tmp1, 0x80000000); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-2].upper); // XXX: tradeoff between memory operands and extra regs? + masm.emit_and_r_i(xenv.tmp0, 0x7FFFFFFF); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp1, vsph[-1].upper); + masm.emit_and_r_i(xenv.tmp1, 0x80000000); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp1, vsph[-1].value); asm.d.or_r_r(r_tmp0, r_tmp1); - asm.movd_m_r(vsph[-2].upper, r_tmp0); + masm.emit_mov_m_r(ValueKind.I32, vsph[-2].upper, xenv.tmp0); decrementVsp(); endHandler(); } @@ -2179,8 +2172,8 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { (Opcode.F64_NEAREST, X86_64Rounding.TO_NEAREST) ]) { bindHandler(t.0); - asm.roundsd_s_m(r_xmm0, vsph[-1].value, t.1); - asm.movsd_m_s(vsph[-1].value, r_xmm0); + masm.emit_roundd_r_m(xenv.xmm0, vsph[-1].value, t.1); + masm.emit_mov_m_r(ValueKind.F64, vsph[-1].value, xenv.xmm0); endHandler(); } } @@ -2194,8 +2187,8 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { (Opcode.F32_LE, C.A), (Opcode.F32_GE, C.C)]) { bindHandler(t.0); - asm.movss_s_m(r_xmm0, vsph[-2].value); - asm.ucomiss_s_m(r_xmm0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.F32, xenv.xmm0, vsph[-2].value); + masm.emit_cmpf_r_m(xenv.xmm0, vsph[-1].value); asm.jc_rel_near(C.P, if(t.0 == Opcode.F32_NE, ret_one, ret_zero)); asm.jc_rel_near(t.1, ret_zero); asm.jmp_rel_near(ret_one); @@ -2204,13 +2197,13 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.bind(ret_zero); decrementVsp(); genTagUpdate(BpTypeCode.I32.code); - asm.movd_m_i(vsph[-1].value, 0); + masm.emit_mov_m_i(vsph[-1].value, 0); endHandler(); asm.bind(ret_one); decrementVsp(); genTagUpdate(BpTypeCode.I32.code); - asm.movd_m_i(vsph[-1].value, 1); + masm.emit_mov_m_i(vsph[-1].value, 1); endHandler(); // XXX: too far of a near jump to share these between f32 and f64 @@ -2224,8 +2217,8 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { (Opcode.F64_LE, C.A), (Opcode.F64_GE, C.C)]) { bindHandler(t.0); - asm.movsd_s_m(r_xmm0, vsph[-2].value); - asm.ucomisd_s_m(r_xmm0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.F64, xenv.xmm0, vsph[-2].value); + masm.emit_cmpd_r_m(xenv.xmm0, vsph[-1].value); asm.jc_rel_near(C.P, if(t.0 == Opcode.F64_NE, ret_one, ret_zero)); asm.jc_rel_near(t.1, ret_zero); asm.jmp_rel_near(ret_one); @@ -2234,49 +2227,49 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.bind(ret_zero); decrementVsp(); genTagUpdate(BpTypeCode.I32.code); - asm.movd_m_i(vsph[-1].value, 0); + masm.emit_mov_m_i(vsph[-1].value, 0); genDispatchOrJumpToDispatch(); asm.bind(ret_one); decrementVsp(); genTagUpdate(BpTypeCode.I32.code); - asm.movd_m_i(vsph[-1].value, 1); + masm.emit_mov_m_i(vsph[-1].value, 1); genDispatchOrJumpToDispatch(); } def genGcInstrs() { bindHandler(Opcode.REF_I31); { - asm.movd_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); asm.d.shl_r_i(r_tmp0, 1); asm.d.or_r_i(r_tmp0, 1); - asm.movq_m_r(vsph[-1].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I64, vsph[-1].value, xenv.tmp0); genTagUpdate(BpTypeCode.I31REF.code); endHandler(); } bindHandler(Opcode.I31_GET_S); { - asm.movd_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); asm.d.cmp_r_i(r_tmp0, 0); asm.jc_rel_far(X86_64Conds.Z, newTrapLabel(TrapReason.NULL_DEREF)); asm.d.sar_r_i(r_tmp0, 1); - asm.movq_m_r(vsph[-1].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I64, vsph[-1].value, xenv.tmp0); genTagUpdate(BpTypeCode.I32.code); endHandler(); } bindHandler(Opcode.I31_GET_U); { - asm.movd_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); asm.d.cmp_r_i(r_tmp0, 0); asm.jc_rel_far(X86_64Conds.Z, newTrapLabel(TrapReason.NULL_DEREF)); asm.d.shr_r_i(r_tmp0, 1); - asm.movd_m_r(vsph[-1].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); genTagUpdate(BpTypeCode.I32.code); endHandler(); } bindHandler(Opcode.ARRAY_LEN); { - asm.movq_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-1].value); asm.q.cmp_r_i(r_tmp0, 0); asm.jc_rel_far(X86_64Conds.Z, newTrapLabel(TrapReason.NULL_DEREF)); asm.movq_r_m(r_tmp0, r_tmp0.plus(offsets.HeapArray_vals)); asm.movd_r_m(r_tmp0, r_tmp0.plus(offsets.Array_length)); - asm.movd_m_r(vsph[-1].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); genTagUpdate(BpTypeCode.I32.code); endHandler(); } @@ -2291,7 +2284,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { callRuntime(refRuntimeCall(X86_64RT.runtime_doCast), [r_tmp3, r_instance, nullable_reg, r_tmp1], false); asm.movbzx_r_r(r_tmp0, Target.V3_RET_GPRS[0]); // XXX: restore just VSP and update first? restoreCallerIVars(); - asm.movd_m_r(vsph[-1].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); genTagUpdate(BpTypeCode.I32.code); endHandler(); // ref.test_null jumps back to ref.test @@ -2343,11 +2336,11 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // TODO: we recompute the start of the current instruction from the curpc and the code object, // because the sidetable entry's delta_ip is encoded relative to 1 + curpc asm.lea(r_ip, X86_64Addr.new(r_tmp1, r_curpc, 1, 1 + offsets.Array_contents)); - asm.jmp_rel_far(controlTransferLabel); + masm.emit_br(controlTransferLabel); } bindHandler(Opcode.REF_AS_NON_NULL); { computeCurIpForTrap(-1); - asm.q.cmp_m_i(vsph[-1].value, 0); + masm.emit_cmpq_m_i(vsph[-1].value, 0); asm.jc_rel_far(C.Z, newTrapLabel(TrapReason.NULL_DEREF)); endHandler(); } @@ -2362,26 +2355,26 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.movq_r_m(r_tmp0, r_tmp0.plusR(r_tmp1, offsets.REF_SIZE, offsets.Array_contents)); asm.movd_r_m(r_tmp1, r_tmp0.plus(offsets.NativeWasmMemory_num_pages)); asm.movb_r_m(r_tmp0, r_tmp0.plus(offsets.NativeWasmMemory_index_tag)); - genTagPushR(r_tmp0); - asm.movq_m_r(vsph[0].value, r_tmp1); + genTagPushR(xenv.tmp0); + masm.emit_mov_m_r(ValueKind.I64, vsph[0].value, xenv.tmp1); incrementVsp(); endHandler(); } bindHandler(Opcode.REF_NULL); { genSkipLeb(); genTagPush(BpTypeCode.REF_NULL.code); - asm.movq_m_i(vsph[0].value, 0); + masm.emit_mov_m_l(vsph[0].value, 0); // XXX: only clear upper slot when REF is guaranteed a REF_U64 - asm.movq_m_i(vsph[0].value.plus(8), 0); + masm.emit_mov_m_l(vsph[0].value.plus(8), 0); incrementVsp(); endHandler(); } bindHandler(Opcode.REF_IS_NULL); { - asm.d.test_m_i(vsph[-1].value, -1); + masm.emit_testd_m_i(vsph[-1].value, -1); asm.set_r(C.Z, r_tmp0); asm.movbzx_r_r(r_tmp0, r_tmp0); - if (valuerep.tagged) asm.movd_m_i(vsph[-1].tag, i7.view(BpTypeCode.I32.code)); - asm.movd_m_r(vsph[-1].value, r_tmp0); + if (valuerep.tagged) masm.emit_mov_m_i(vsph[-1].tag, i7.view(BpTypeCode.I32.code)); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); endHandler(); } bindHandler(Opcode.REF_FUNC); { @@ -2389,7 +2382,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.movq_r_m(r_tmp0, r_instance.plus(offsets.Instance_functions)); asm.movq_r_m(r_tmp0, r_tmp0.plusR(r_tmp1, offsets.REF_SIZE, offsets.Array_contents)); genTagPush(BpTypeCode.FUNCREF.code); - asm.movq_m_r(vsph[0].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I64, vsph[0].value, xenv.tmp0); incrementVsp(); endHandler(); } @@ -2410,10 +2403,10 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.movq_r_m(r_tmp0, r_instance.plus(offsets.Instance_tables)); asm.movq_r_m(r_tmp0, r_tmp0.plusR(r_tmp1, offsets.REF_SIZE, offsets.Array_contents)); asm.movb_r_m(r_tmp2, r_tmp0.plus(offsets.Table_index_tag)); - genTagPushR(r_tmp2); + genTagPushR(xenv.tmp2); asm.movq_r_m(r_tmp0, r_tmp0.plus(offsets.Table_elems)); asm.movd_r_m(r_tmp0, r_tmp0.plus(offsets.Array_length)); - asm.movq_m_r(vsph[0].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I64, vsph[0].value, xenv.tmp0); incrementVsp(); endHandler(); } @@ -2530,7 +2523,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { var cont = xenv.xmm0, ex = xenv.tmp0; var contStack = xenv.tmp1; genPopCont(X(cont)); - genPopInto(G(ex)); + genPopInto(ex); masm.emit_validate_and_consume_cont(contStack, cont); var curStack = xenv.tmp4; @@ -2743,35 +2736,35 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { def genFloatMinAndMax() { var ret_b = X86_64Label.new(), ret_a = X86_64Label.new(), is_nan32 = X86_64Label.new(), is_nan64 = X86_64Label.new(); bindHandler(Opcode.F32_MIN); - asm.movss_s_m(r_xmm0, vsph[-2].value); - asm.movss_s_m(r_xmm1, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.F32, xenv.xmm0, vsph[-2].value); + masm.emit_mov_r_m(ValueKind.F32, xenv.xmm1, vsph[-1].value); asm.ucomiss_s_s(r_xmm0, r_xmm1); asm.jc_rel_far(C.P, is_nan32); asm.jc_rel_near(C.C, ret_a); asm.jc_rel_near(C.A, ret_b); - asm.d.cmp_m_i(vsph[-1].value, 0); + masm.emit_cmpd_m_i(vsph[-1].value, 0); asm.jc_rel_near(C.S, ret_b); // handle min(-0, 0) == -0 asm.jmp_rel_near(ret_a); bindHandler(Opcode.F32_MAX); - asm.movss_s_m(r_xmm0, vsph[-2].value); - asm.movss_s_m(r_xmm1, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.F32, xenv.xmm0, vsph[-2].value); + masm.emit_mov_r_m(ValueKind.F32, xenv.xmm1, vsph[-1].value); asm.ucomiss_s_s(r_xmm0, r_xmm1); asm.jc_rel_far(C.P, is_nan32); asm.jc_rel_near(C.C, ret_b); asm.jc_rel_near(C.A, ret_a); - asm.d.cmp_m_i(vsph[-1].value, 0); + masm.emit_cmpd_m_i(vsph[-1].value, 0); asm.jc_rel_near(C.NS, ret_b); // handle max(-0, 0) == 0 asm.jmp_rel_near(ret_a); bindHandler(Opcode.F64_MIN); - asm.movsd_s_m(r_xmm0, vsph[-2].value); - asm.movsd_s_m(r_xmm1, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.F64, xenv.xmm0, vsph[-2].value); + masm.emit_mov_r_m(ValueKind.F64, xenv.xmm1, vsph[-1].value); asm.ucomisd_s_s(r_xmm0, r_xmm1); asm.jc_rel_near(C.P, is_nan64); asm.jc_rel_near(C.C, ret_a); asm.jc_rel_near(C.A, ret_b); - asm.d.cmp_m_i(vsph[-1].upper, 0); + masm.emit_cmpd_m_i(vsph[-1].upper, 0); asm.jc_rel_near(C.S, ret_b); // handle min(-0, 0) == -0 // fall through to ret_a asm.bind(ret_a); @@ -2779,27 +2772,27 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { endHandler(); bindHandler(Opcode.F64_MAX); - asm.movsd_s_m(r_xmm0, vsph[-2].value); - asm.movsd_s_m(r_xmm1, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.F64, xenv.xmm0, vsph[-2].value); + masm.emit_mov_r_m(ValueKind.F64, xenv.xmm1, vsph[-1].value); asm.ucomisd_s_s(r_xmm0, r_xmm1); asm.jc_rel_near(C.P, is_nan64); asm.jc_rel_near(C.C, ret_b); asm.jc_rel_near(C.A, ret_a); - asm.d.cmp_m_i(vsph[-1].upper, 0); + masm.emit_cmpd_m_i(vsph[-1].upper, 0); asm.jc_rel_near(C.S, ret_a); // handle max(-0, 0) == 0 // fall through to ret_b asm.bind(ret_b); - asm.movsd_m_s(vsph[-2].value, r_xmm1); + masm.emit_mov_m_r(ValueKind.F64, vsph[-2].value, xenv.xmm1); decrementVsp(); endHandler(); asm.bind(is_nan32); - asm.movd_m_i(vsph[-2].value, int.view(FloatUtils.f_nan)); + masm.emit_mov_m_i(vsph[-2].value, int.view(FloatUtils.f_nan)); asm.jmp_rel_near(ret_a); asm.bind(is_nan64); - asm.movd_m_i(vsph[-2].upper, int.view(FloatUtils.d_nan >> 32)); - asm.movd_m_i(vsph[-2].value, 0); + masm.emit_mov_m_i(vsph[-2].upper, int.view(FloatUtils.d_nan >> 32)); + masm.emit_mov_m_i(vsph[-2].value, 0); asm.jmp_rel_near(ret_a); } def genFloatTruncs() { @@ -2824,16 +2817,16 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // XXX: don't load current IP for saturating conversions computeCurIpForTrap(-1); // load value from stack - if (opcode.sig.params[0] == ValueType.F32) asm.movss_s_m(r_xmm0, vsph[-1].value); - else asm.movsd_s_m(r_xmm0, vsph[-1].value); + if (opcode.sig.params[0] == ValueType.F32) masm.emit_mov_r_m(ValueKind.F32, xenv.xmm0, vsph[-1].value); + else masm.emit_mov_r_m(ValueKind.F64, xenv.xmm0, vsph[-1].value); // emit conversion masm.emit_i_trunc_f(opcode, r_tmp0, r_xmm0, r_xmm1); // store and update tag if (opcode.sig.results[0] == ValueType.I32) { - asm.movd_m_r(vsph[-1].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); genTagUpdate(BpTypeCode.I32.code); } else { - asm.movq_m_r(vsph[-1].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I64, vsph[-1].value, xenv.tmp0); genTagUpdate(BpTypeCode.I64.code); } endHandler(); @@ -2842,74 +2835,76 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { def genFloatConversions() { bindHandler(Opcode.F32_CONVERT_I32_S); { genTagUpdate(BpTypeCode.F32.code); - asm.movd_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); asm.q.shl_r_i(r_tmp0, 32); asm.q.sar_r_i(r_tmp0, 32); // sign-extend asm.cvtsi2ss_s_r(r_xmm0, r_tmp0); - asm.movss_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.F32, vsph[-1].value, xenv.xmm0); endHandler(); } bindHandler(Opcode.F32_CONVERT_I32_U); { genTagUpdate(BpTypeCode.F32.code); - asm.movd_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); asm.cvtsi2ss_s_r(r_xmm0, r_tmp0); - asm.movss_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.F32, vsph[-1].value, xenv.xmm0); endHandler(); } bindHandler(Opcode.F32_CONVERT_I64_S); { genTagUpdate(BpTypeCode.F32.code); - asm.movq_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-1].value); asm.cvtsi2ss_s_r(r_xmm0, r_tmp0); - asm.movss_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.F32, vsph[-1].value, xenv.xmm0); endHandler(); } bindHandler(Opcode.F32_CONVERT_I64_U); { genTagUpdate(BpTypeCode.F32.code); - asm.movq_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-1].value); masm.emit_f32_convert_i64_u(r_xmm0, r_tmp0, r_xmm1, r_scratch); - asm.movss_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.F32, vsph[-1].value, xenv.xmm0); endHandler(); } bindHandler(Opcode.F32_DEMOTE_F64); { genTagUpdate(BpTypeCode.F32.code); - asm.cvtsd2ss_s_m(r_xmm0, vsph[-1].value); - asm.movss_m_s(vsph[-1].value, r_xmm0); + // XXX how come this is the only one that uses vsph in operation? + masm.emit_demote_r_m(xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F32, vsph[-1].value, xenv.xmm0); endHandler(); } bindHandler(Opcode.F64_CONVERT_I32_S); { genTagUpdate(BpTypeCode.F64.code); - asm.movd_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); asm.q.shl_r_i(r_tmp0, 32); asm.q.sar_r_i(r_tmp0, 32); // sign-extend asm.cvtsi2sd_s_r(r_xmm0, r_tmp0); - asm.movsd_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.F64, vsph[-1].value, xenv.xmm0); endHandler(); } bindHandler(Opcode.F64_CONVERT_I32_U); { genTagUpdate(BpTypeCode.F64.code); - asm.movd_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp0, vsph[-1].value); asm.cvtsi2sd_s_r(r_xmm0, r_tmp0); - asm.movsd_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.F64, vsph[-1].value, xenv.xmm0); endHandler(); } bindHandler(Opcode.F64_CONVERT_I64_S); { genTagUpdate(BpTypeCode.F64.code); - asm.movq_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-1].value); asm.cvtsi2sd_s_r(r_xmm0, r_tmp0); - asm.movsd_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.F64, vsph[-1].value, xenv.xmm0); endHandler(); } bindHandler(Opcode.F64_CONVERT_I64_U); { genTagUpdate(BpTypeCode.F64.code); - asm.movq_r_m(r_tmp0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-1].value); masm.emit_f64_convert_i64_u(r_xmm0, r_tmp0, r_xmm1, r_scratch); - asm.movsd_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.F64, vsph[-1].value, xenv.xmm0); endHandler(); } bindHandler(Opcode.F64_PROMOTE_F32); { genTagUpdate(BpTypeCode.F64.code); - asm.cvtss2sd_s_m(r_xmm0, vsph[-1].value); - asm.movsd_m_s(vsph[-1].value, r_xmm0); + // XXX how come this is the only two that uses vsph in operation? + masm.emit_promote_r_m(xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.F64, vsph[-1].value, xenv.xmm0); endHandler(); } } @@ -2986,24 +2981,24 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // helper method to move values from vsp to registers // vsp[-2] -> s0, vsp[-1] -> s1 - def load_v128_s_s(s0: X86_64Xmmr, s1: X86_64Xmmr) { - asm.movdqu_s_m(s0, vsph[-2].value); - asm.movdqu_s_m(s1, vsph[-1].value); + def load_v128_s_s(s0: Reg, s1: Reg) { + masm.emit_mov_r_m(ValueKind.V128, s0, vsph[-2].value); + masm.emit_mov_r_m(ValueKind.V128, s1, vsph[-1].value); } - def load_v128_s_r(s: X86_64Xmmr, r: X86_64Gpr) { - asm.movdqu_s_m(s, vsph[-2].value); - asm.movd_r_m(r, vsph[-1].value); + def load_v128_s_r(s: Reg, r: Reg) { + masm.emit_mov_r_m(ValueKind.V128, s, vsph[-2].value); + masm.emit_mov_r_m(ValueKind.I32, r, vsph[-1].value); } // vsp[-2] -> xmm0, vsp[-1] -> xmm1 def load_v128_xmm0_xmm1(){ - load_v128_s_s(r_xmm0, r_xmm1); + load_v128_s_s(xenv.xmm0, xenv.xmm1); } // vsp[-2] -> xmm1, vsp[-1] -> xmm0 def load_v128_xmm1_xmm0(){ - load_v128_s_s(r_xmm1, r_xmm0); + load_v128_s_s(xenv.xmm1, xenv.xmm0); } def load_v128_xmm0_tmp0(){ - load_v128_s_r(r_xmm0, r_tmp0); + load_v128_s_r(xenv.xmm0, xenv.tmp0); } def load_imm8(r: X86_64Gpr) { asm.movbzx_r_m(r, ip_ptr); @@ -3045,7 +3040,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm_mov_m_r: (X86_64Addr, X86_64Gpr) -> T, signExt: bool) { bindHandler(opcode); - def dst: X86_64Addr = vsph[-1].value; + def dst: X86_64Addr = A(vsph[-1].value); // XXX skipped def idx: X86_64Gpr = r_tmp0; def src: X86_64Gpr = r_tmp1; // load imm (one byte) @@ -3067,8 +3062,8 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm_mov_r_m: (X86_64Gpr, X86_64Addr) -> T, asm_mov_m_r: (X86_64Addr, X86_64Gpr) -> T) { bindHandler(opcode); - def dst: X86_64Addr = vsph[-2].value; - def src: X86_64Addr = vsph[-1].value; + def dst: X86_64Addr = A(vsph[-2].value); // XXX skipped + def src: X86_64Addr = A(vsph[-1].value); // XXX skipped def idx: X86_64Gpr = r_tmp0; def val: X86_64Gpr = r_tmp1; def addr: X86_64Gpr = r_tmp2; @@ -3083,11 +3078,11 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { load_memarg: (X86_64Gpr, X86_64Addr, X86_64Gpr) -> void, asm_mov_m_r: (X86_64Addr, X86_64Gpr) -> T) { bindHandler(opcode); - def dst: X86_64Addr = vsph[-1].value; + def dst: X86_64Addr = A(vsph[-1].value); def idx: X86_64Gpr = r_tmp0; def val: X86_64Gpr = r_tmp1; def addr: X86_64Gpr = r_tmp2; - load_memarg(val, vsph[-2].value, addr); // should load memarg first per the bytecode order + load_memarg(val, A(vsph[-2].value), addr); // should load memarg first per the bytecode order load_imm8(idx); // then load imm asm.q.lea(addr, dst); // load address asm_mov_m_r(X86_64Addr.new(addr, idx, size, 0), val); // store (replace) value @@ -3098,10 +3093,10 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { load_memarg: (X86_64Gpr, X86_64Addr, X86_64Gpr) -> void, asm_insert_s_r_i: (X86_64Xmmr, X86_64Gpr, u8) -> T) { bindHandler(opcode); - load_memarg(r_tmp0, vsph[-1].value, r_tmp1); // load memarg to tmp0 + load_memarg(r_tmp0, A(vsph[-1].value), r_tmp1); // load memarg to tmp0 masm.emit_v128_zero(r_xmm0); // zero out xmm0 asm_insert_s_r_i(r_xmm0, r_tmp0, 0); // insert value to lowest bits - asm.movdqu_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-1].value, xenv.xmm0); if (valuerep.tagged) genTagUpdate(BpTypeCode.V128.code); endHandler(); } @@ -3111,20 +3106,20 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { def dst = r_xmm0; def tmp1 = r_tmp0; def tmp2 = r_tmp1; - def src = decode_memarg(vsph[-1].value, tmp1, tmp2); + def src = decode_memarg(A(vsph[-1].value), tmp1, tmp2); asm_extend_s_m(dst, src); if (valuerep.tagged) genTagUpdate(BpTypeCode.V128.code); - asm.movdqu_m_s(vsph[-1].value, dst); + asm.movdqu_m_s(A(vsph[-1].value), dst); endHandler(); } def genStoreLane(opcode: Opcode, size: byte, asm_mov_r_m: (X86_64Gpr, X86_64Addr) -> T, asm_mov_m_r: (X86_64Addr, X86_64Gpr) -> T) { bindHandler(opcode); - def data: X86_64Addr = vsph[-1].value; + def data: X86_64Addr = A(vsph[-1].value); def idx: X86_64Gpr = r_tmp0; def val: X86_64Gpr = r_tmp1; - def mem_addr = decode_memarg(vsph[-2].value, idx, val); + def mem_addr = decode_memarg(A(vsph[-2].value), idx, val); load_imm8(idx); asm.q.lea(val, data); // load address of the vector, val as a temp variable asm_mov_r_m(val, X86_64Addr.new(val, idx, size, 0)); // extract the lane from the value stack @@ -3136,9 +3131,9 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm_mov_r_m: (X86_64Gpr, X86_64Addr) -> T, masm_emit: (X86_64Xmmr, X86_64Gpr) -> void) { bindHandler(opcode); - asm_mov_r_m(r_tmp0, vsph[-1].value); + asm_mov_r_m(r_tmp0, A(vsph[-1].value)); masm_emit(r_xmm0, r_tmp0); - asm.movdqu_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-1].value, xenv.xmm0); genTagUpdate(BpTypeCode.V128.code); endHandler(); } @@ -3146,9 +3141,9 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { load_memarg: (X86_64Gpr, X86_64Addr, X86_64Gpr) -> void, masm_emit: (X86_64Xmmr, X86_64Gpr) -> void) { bindHandler(opcode); - load_memarg(r_tmp0, vsph[-1].value, r_tmp1); + load_memarg(r_tmp0, A(vsph[-1].value), r_tmp1); masm_emit(r_xmm0, r_tmp0); - asm.movdqu_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-1].value, xenv.xmm0); genTagUpdate(BpTypeCode.V128.code); endHandler(); } @@ -3157,7 +3152,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { bindHandler(Opcode.V128_CONST); { asm.movdqu_s_m(r_xmm0, ip_ptr); asm.q.add_r_i(r_ip, 16); - asm.movdqu_m_s(vsph[0].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[0].value, xenv.xmm0); genTagPush(BpTypeCode.V128.code); incrementVsp(); endHandler(); @@ -3172,9 +3167,9 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // V128 load bindHandler(Opcode.V128_LOAD); { computeCurIpForTrap(-1); - load_memarg128(r_xmm0, vsph[-1].value, r_tmp0, r_tmp1); + load_memarg128(r_xmm0, A(vsph[-1].value), r_tmp0, r_tmp1); if (valuerep.tagged) genTagUpdate(BpTypeCode.V128.code); // update tag if necessary - asm.movdqu_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-1].value, xenv.xmm0); endHandler(); } // V128 load_lane @@ -3207,8 +3202,8 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // V128 store bindHandler(Opcode.V128_STORE); { computeCurIpForTrap(-1); - asm.movdqu_s_m(r_xmm0, vsph[-1].value); - store_memarg128(vsph[-2].value, r_xmm0, r_tmp0, r_tmp1); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-1].value); + store_memarg128(A(vsph[-2].value), r_xmm0, r_tmp0, r_tmp1); adjustVsp(-2); endHandler(); } @@ -3243,11 +3238,11 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // Use the bits in the control mask c to select the corresponding bit // from v1 when 1 and v2 when 0. // This operation is equivalent to v128.or(v128.and(v1, c), v128.and(v2, v128.not(c))) - asm.movdqu_s_m(r_xmm0, vsph[-3].value); // v1 - asm.movdqu_s_m(r_xmm1, vsph[-2].value); // v2 - asm.movdqu_s_m(r_xmm2, vsph[-1].value); // c + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-3].value); // v1 + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm1, vsph[-2].value); // v2 + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm2, vsph[-1].value); // c masm.emit_v128_bitselect(r_xmm0, r_xmm1, r_xmm2, r_xmm3); - asm.movdqu_m_s(vsph[-3].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-3].value, xenv.xmm0); adjustVsp(-2); endHandler(); } @@ -3271,7 +3266,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { bindHandler(t.0); load_v128_xmm0_tmp0(); t.1(r_xmm0, r_tmp0, r_tmp1, r_xmm1, r_xmm2); - asm.movdqu_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } @@ -3288,7 +3283,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { bindHandler(t.0); load_v128_xmm0_tmp0(); masm.emit_v128_shift(r_xmm0, r_tmp0, byte.view(t.2), r_tmp1, r_xmm1, t.1); - asm.movdqu_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } @@ -3467,7 +3462,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { bindHandler(Opcode.I8X16_SHUFFLE); { var RHS = X86_64Label.new(), LOOP_PRO = X86_64Label.new(), LOOP_EPI = X86_64Label.new(); incrementVsp(); // make room for a local variable dst (the result) - def dst = vsph[-1].value; + def dst = A(vsph[-1].value); def dst_addr = r_tmp3; asm.q.lea(dst_addr, dst); def idx = r_tmp0; // idx's bound is [0, 31] @@ -3475,8 +3470,8 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { def i = r_tmp2; // loop counter def SIMD_128_SIZE: byte = 16; // vectors from the value stack - def lhs = vsph[-3].value; - def rhs = vsph[-2].value; + def lhs = A(vsph[-3].value); + def rhs = A(vsph[-2].value); // for (i: byte < 16) asm.movq_r_i(i, 0); // LOOP_PROLOGUE: @@ -3494,8 +3489,8 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.q.cmp_r_i(i, SIMD_128_SIZE); // loop when 0 <= i < 16 asm.jc_rel_near(C.L, LOOP_PRO); // Return - asm.movdqu_s_m(r_xmm0, vsph[-1].value); - asm.movdqu_m_s(vsph[-3].value, r_xmm0); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-1].value); + masm.emit_mov_m_r(ValueKind.V128, vsph[-3].value, xenv.xmm0); adjustVsp(-2); endHandler(); // RHS: @@ -3509,7 +3504,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { bindHandler(Opcode.I8X16_RELAXED_SWIZZLE); { // XXX: faster swizzle available? load_v128_xmm0_xmm1(); masm.emit_i8x16_swizzle(r_xmm0, r_xmm1, r_tmp0, r_xmm2); - asm.movdqu_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } @@ -3521,23 +3516,23 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // Relaxed dot products. genSimdBinopCommute(Opcode.I16X8_RELAXED_DOT_I8X16_I7X16_S, asm.pmaddubsw_s_s); bindHandler(Opcode.I32X4_RELAXED_DOT_I8X16_I7X16_ADD_S); { - asm.movdqu_s_m(r_xmm0, vsph[-3].value); // a - asm.movdqu_s_m(r_xmm1, vsph[-2].value); // b - asm.movdqu_s_m(r_xmm2, vsph[-1].value); // c + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-3].value); // a + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm1, vsph[-2].value); // b + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm2, vsph[-1].value); // c asm.pmaddubsw_s_s(r_xmm1, r_xmm0); masm.load_v128_mask(r_xmm0, masm.mask_i16x8_splat_0x0001, r_tmp0); asm.pmaddwd_s_s(r_xmm0, r_xmm1); asm.paddd_s_s(r_xmm2, r_xmm0); - asm.movdqu_m_s(vsph[-3].value, r_xmm2); + masm.emit_mov_m_r(ValueKind.V128, vsph[-3].value, xenv.xmm2); adjustVsp(-2); endHandler(); } } def genSimdMultiplyAdd(op: Opcode, is64: bool, isNeg: bool) { bindHandler(op); - asm.movdqu_s_m(r_xmm0, vsph[-3].value); // a - asm.movdqu_s_m(r_xmm1, vsph[-2].value); // b - asm.movdqu_s_m(r_xmm2, vsph[-1].value); // c + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-3].value); // a + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm1, vsph[-2].value); // b + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm2, vsph[-1].value); // c if (is64) { asm.mulpd_s_s(r_xmm0, r_xmm1); if (isNeg) asm.subpd_s_s(r_xmm2, r_xmm0); @@ -3547,7 +3542,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { if (isNeg) asm.subps_s_s(r_xmm2, r_xmm0); else asm.addps_s_s(r_xmm2, r_xmm0); } - asm.movdqu_m_s(vsph[-3].value, r_xmm2); + masm.emit_mov_m_r(ValueKind.V128, vsph[-3].value, xenv.xmm2); adjustVsp(-2); endHandler(); } @@ -3556,27 +3551,27 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { } def genSimdBinops(opcodes: Array, f: (X86_64Xmmr, X86_64Xmmr) -> T) { for (opcode in opcodes) bindHandler(opcode); - asm.movdqu_s_m(r_xmm0, vsph[-2].value); - asm.movdqu_s_m(r_xmm1, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-2].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm1, vsph[-1].value); f(r_xmm0, r_xmm1); - asm.movdqu_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } def genSimdBinopCommute(opcode: Opcode, f: (X86_64Xmmr, X86_64Xmmr) -> T) { bindHandler(opcode); - asm.movdqu_s_m(r_xmm1, vsph[-2].value); - asm.movdqu_s_m(r_xmm0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm1, vsph[-2].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-1].value); f(r_xmm0, r_xmm1); - asm.movdqu_m_s(vsph[-2].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-2].value, xenv.xmm0); decrementVsp(); endHandler(); } def genSimdUnop_xx_x(opcode: Opcode, f: (X86_64Xmmr, X86_64Xmmr) -> T) { bindHandler(opcode); - asm.movdqu_s_m(r_xmm0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-1].value); f(r_xmm0, r_xmm0); - asm.movdqu_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-1].value, xenv.xmm0); endHandler(); } def genSimdUnop_x_x(opcode: Opcode, f: (X86_64Xmmr) -> T) { @@ -3584,31 +3579,31 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { } def genSimdUnops_x_x(opcodes: Array, f: (X86_64Xmmr) -> T) { for (opcode in opcodes) bindHandler(opcode); - asm.movdqu_s_m(r_xmm0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-1].value); f(r_xmm0); - asm.movdqu_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-1].value, xenv.xmm0); endHandler(); } def genSimdUnop_xxtmp_x(opcode: Opcode, f: (X86_64Xmmr, X86_64Xmmr) -> T) { bindHandler(opcode); - asm.movdqu_s_m(r_xmm0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-1].value); f(r_xmm0, r_xmm1); - asm.movdqu_m_s(vsph[-1].value, r_xmm0); + masm.emit_mov_m_r(ValueKind.V128, vsph[-1].value, xenv.xmm0); endHandler(); } def genSimdUnop_x_r(opcode: Opcode, f: (X86_64Gpr, X86_64Xmmr) -> T) { bindHandler(opcode); - asm.movdqu_s_m(r_xmm0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-1].value); f(r_tmp0, r_xmm0); - asm.movd_m_r(vsph[-1].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); genTagUpdate(BpTypeCode.I32.code); endHandler(); } def genSimdUnop_xxtmp_r(opcode: Opcode, f: (X86_64Gpr, X86_64Xmmr, X86_64Xmmr) -> T) { bindHandler(opcode); - asm.movdqu_s_m(r_xmm0, vsph[-1].value); + masm.emit_mov_r_m(ValueKind.V128, xenv.xmm0, vsph[-1].value); f(r_tmp0, r_xmm0, r_xmm1); - asm.movd_m_r(vsph[-1].value, r_tmp0); + masm.emit_mov_m_r(ValueKind.I32, vsph[-1].value, xenv.tmp0); genTagUpdate(BpTypeCode.I32.code); endHandler(); } @@ -3700,13 +3695,13 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { stk[i] = 1; // mark as done } def genTagUpdate(tag: byte) { - if (valuerep.tagged) asm.movq_m_i(vsph[-1].tag, tag); + if (valuerep.tagged) masm.emit_mov_m_l(vsph[-1].tag, tag); } def genTagPush(tag: byte) { - if (valuerep.tagged) asm.movq_m_i(vsph[0].tag, i7.view(tag)); + if (valuerep.tagged) masm.emit_mov_m_l(vsph[0].tag, i7.view(tag)); } - def genTagPushR(r: X86_64Gpr) { - if (valuerep.tagged) asm.movq_m_r(vsph[0].tag, r); + def genTagPushR(r: Reg) { + if (valuerep.tagged) masm.emit_mov_m_r(ValueKind.I64, vsph[0].tag, r); } def genCopySlot(dst: X86_64Addr, src: X86_64Addr) { match (valuerep.slot_size) { @@ -3761,12 +3756,12 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.jc_rel_near(C.NZ, has_index); } genReadUleb32(r_tmp0); // decode offset - asm.movd_r_m(r_tmp1, vsph[-1].value); // read index off value stack + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp1, vsph[-1].value); // read index off value stack asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset gen(r_tmp1, r_mem0_base.plusR(r_tmp0, 1, 0)); asm.bind(finish); if (valuerep.tagged && tag != BpTypeCode.I32.code) genTagUpdate(tag); // update tag if necessary - asm.movq_m_r(vsph[-1].value, r_tmp1); + masm.emit_mov_m_r(ValueKind.I64, vsph[-1].value, xenv.tmp1); endHandler(); if (has_index != null) { asm.bind(has_index); @@ -3776,7 +3771,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.movq_r_m(memN, memN.plusR(r_tmp0, 8, offsets.Array_contents)); asm.movq_r_m(memN, memN.plus(offsets.NativeWasmMemory_start)); genReadUleb32(r_tmp0); // decode offset - asm.movd_r_m(r_tmp1, vsph[-1].value); // read index off value stack + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp1, vsph[-1].value); // read index off value stack asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset gen(r_tmp1, memN.plusR(r_tmp0, 1, 0)); asm.jmp_rel_near(finish); @@ -3785,7 +3780,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.bind(index64); // TODO: multi-memory with memory64 masm.emit_read_uleb(r_tmp3, r_ip, r_tmp1, r_tmp0); // decode offset - asm.movq_r_m(r_tmp1, vsph[-1].value); // read index off value stack + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-1].value); // read index off value stack asm.movq_r_r(r_tmp2, r_tmp1); asm.q.or_r_r(r_tmp2, r_tmp3); // check that neither offset or index have asm.q.shr_r_i(r_tmp2, 34); // upper bits set @@ -3813,9 +3808,9 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.jc_rel_near(C.NZ, has_index); } genReadUleb32(r_tmp0); // decode offset - asm.movd_r_m(r_tmp1, vsph[-2].value); // read index + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp1, vsph[-2].value); // read index asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset - asm.movq_r_m(r_tmp1, vsph[-1].value); // read value + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-1].value); // read value gen(r_mem0_base.plusR(r_tmp0, 1, 0), r_tmp1); asm.bind(finish); adjustVsp(-2); @@ -3828,9 +3823,9 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.movq_r_m(memN, memN.plusR(r_tmp0, 8, offsets.Array_contents)); asm.movq_r_m(memN, memN.plus(offsets.NativeWasmMemory_start)); genReadUleb32(r_tmp0); // decode offset - asm.movd_r_m(r_tmp1, vsph[-2].value); // read index off value stack + masm.emit_mov_r_m(ValueKind.I32, xenv.tmp1, vsph[-2].value); // read index off value stack asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset - asm.movq_r_m(r_tmp1, vsph[-1].value); // read value + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-1].value); // read value gen(memN.plusR(r_tmp0, 1, 0), r_tmp1); asm.jmp_rel_near(finish); } @@ -3838,14 +3833,14 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.bind(index64); // TODO: multi-memory with memory64 masm.emit_read_uleb(r_tmp3, r_ip, r_tmp1, r_tmp0); // decode offset - asm.movq_r_m(r_tmp1, vsph[-2].value); // read index off value stack + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-2].value); // read index off value stack asm.movq_r_r(r_tmp2, r_tmp1); asm.q.or_r_r(r_tmp2, r_tmp3); // check that neither offset or index have asm.q.shr_r_i(r_tmp2, 34); // upper bits set var label = newTrapLabel(TrapReason.MEMORY_OOB); asm.jc_rel_far(X86_64Conds.NZ, label); asm.q.add_r_r(r_tmp3, r_tmp1); // add index + offset - asm.movq_r_m(r_tmp1, vsph[-1].value); // read value + masm.emit_mov_r_m(ValueKind.I64, xenv.tmp1, vsph[-1].value); // read value gen(r_mem0_base.plusR(r_tmp3, 1, 0), r_tmp1); asm.jmp_rel_near(finish); } @@ -4215,16 +4210,16 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { fatal("ran out of buffer space"); return w; } - private def genPopInto(reg: X86_64Gpr) { + private def genPopInto(reg: Reg) { decrementVsp(); - asm.movq_r_m(reg, vsph[0].value); + masm.emit_mov_r_m(ValueKind.I64, reg, vsph[0].value); } private def genPopCont(reg: X86_64Xmmr) { decrementVsp(); - asm.movq_r_m(r_scratch, vsph[0].value); + masm.emit_mov_r_m(ValueKind.I64, xenv.scratch, vsph[0].value); asm.pinsrq_s_r_i(reg, r_scratch, 0); if (!FeatureDisable.unboxedConts) { - asm.movd_r_m(r_scratch, vsph[0].value.plus(8)); + masm.emit_mov_r_m(ValueKind.I32, xenv.scratch, vsph[0].value.plus(8)); asm.pinsrq_s_r_i(reg, r_scratch, 1); } } diff --git a/src/engine/x86-64/X86_64MacroAssembler.v3 b/src/engine/x86-64/X86_64MacroAssembler.v3 index 5325e353f..9de127d83 100644 --- a/src/engine/x86-64/X86_64MacroAssembler.v3 +++ b/src/engine/x86-64/X86_64MacroAssembler.v3 @@ -505,11 +505,36 @@ class X86_64MacroAssembler extends MacroAssembler { I32_AND => asm.d.and_r_m(G(reg), A(ma)); I32_OR => asm.d.or_r_m(G(reg), A(ma)); I32_XOR => asm.d.xor_r_m(G(reg), A(ma)); + I32_MUL => asm.d.imul_r_m(G(reg), A(ma)); I64_ADD => asm.q.add_r_m(G(reg), A(ma)); I64_SUB => asm.q.sub_r_m(G(reg), A(ma)); I64_AND => asm.q.and_r_m(G(reg), A(ma)); I64_OR => asm.q.or_r_m(G(reg), A(ma)); I64_XOR => asm.q.xor_r_m(G(reg), A(ma)); + I64_MUL => asm.q.imul_r_m(G(reg), A(ma)); + F32_ADD => asm.addss_s_m(X(reg), A(ma)); + F32_SUB => asm.subss_s_m(X(reg), A(ma)); + F32_MUL => asm.mulss_s_m(X(reg), A(ma)); + F32_DIV => asm.divss_s_m(X(reg), A(ma)); + F64_ADD => asm.addsd_s_m(X(reg), A(ma)); + F64_SUB => asm.subsd_s_m(X(reg), A(ma)); + F64_MUL => asm.mulsd_s_m(X(reg), A(ma)); + F64_DIV => asm.divsd_s_m(X(reg), A(ma)); + _ => unimplemented(); + } + } + def emit_binop_m_r(op: Opcode, ma: MasmAddr, reg: Reg) { + match (op) { + I32_ADD => asm.d.add_m_r(A(ma), G(reg)); + I32_SUB => asm.d.sub_m_r(A(ma), G(reg)); + I32_AND => asm.d.and_m_r(A(ma), G(reg)); + I32_OR => asm.d.or_m_r(A(ma), G(reg)); + I32_XOR => asm.d.xor_m_r(A(ma), G(reg)); + I64_ADD => asm.q.add_m_r(A(ma), G(reg)); + I64_SUB => asm.q.sub_m_r(A(ma), G(reg)); + I64_AND => asm.q.and_m_r(A(ma), G(reg)); + I64_OR => asm.q.or_m_r(A(ma), G(reg)); + I64_XOR => asm.q.xor_m_r(A(ma), G(reg)); _ => unimplemented(); } } @@ -528,6 +553,42 @@ class X86_64MacroAssembler extends MacroAssembler { _ => unimplemented(); } } + def emit_testd_m_i(addr: MasmAddr, i: int) { + asm.d.test_m_i(A(addr), i); + } + def emit_testq_m_i(addr: MasmAddr, i: int) { + asm.q.test_m_i(A(addr), i); + } + def emit_and_r_i(reg: Reg, i: int) { + asm.d.and_r_i(G(reg), i); + } + def emit_and_m_i(addr: MasmAddr, i: int) { + asm.d.and_m_i(A(addr), i); + } + def emit_xor_m_i(addr: MasmAddr, i: int) { + asm.d.and_m_i(A(addr), i); + } + def emit_popcntd_r_m(reg: Reg, addr: MasmAddr) { + asm.d.popcnt_r_m(G(reg), A(addr)); + } + def emit_popcntq_r_m(reg: Reg, addr: MasmAddr) { + asm.q.popcnt_r_m(G(reg), A(addr)); + } + def emit_sqrtf_r_m(reg: Reg, addr: MasmAddr) { + asm.sqrtss_s_m(X(reg), A(addr)); + } + def emit_sqrtd_r_m(reg: Reg, addr: MasmAddr) { + asm.sqrtsd_s_m(X(reg), A(addr)); + } + def emit_demote_r_m(reg: Reg, addr: MasmAddr) { + asm.cvtsd2ss_s_m(X(reg), A(addr)); + } + def emit_promote_r_m(reg: Reg, addr: MasmAddr) { + asm.cvtss2sd_s_m(X(reg), A(addr)); + } + def emit_cmpq_m_i(addr: MasmAddr, val: int) { + asm.q.cmp_m_i(A(addr), val); + } def emit_cmpq_r_i(cond: X86_64Cond, r1: X86_64Gpr, val: int) { asm.q.cmp_r_i(r1, val); asm.set_r(cond, r1); @@ -553,6 +614,36 @@ class X86_64MacroAssembler extends MacroAssembler { asm.set_r(cond, r1); asm.q.movbzx_r_r(r1, r1); } + def emit_cmpq_m_r(cond: X86_64Cond, addr: MasmAddr, reg: Reg) { + def r1 = G(reg); + asm.q.cmp_m_r(A(addr), r1); + asm.set_r(cond, r1); + asm.q.movbzx_r_r(r1, r1); + } + def emit_cmpd_m_r(cond: X86_64Cond, addr: MasmAddr, reg: Reg) { + def r1 = G(reg); + asm.d.cmp_m_r(A(addr), r1); + asm.set_r(cond, r1); + asm.d.movbzx_r_r(r1, r1); + } + // TODO migrate into above later based on uses (as br_r) + def emit_cmpd_m_i(addr: MasmAddr, val: int) { + asm.d.cmp_m_i(A(addr), val); + } + def emit_cmpf_r_m(reg: Reg, addr: MasmAddr) { + // TODO migrate more in here? + asm.q.ucomiss_s_m(X(reg), A(addr)); + } + def emit_cmpd_r_m(reg: Reg, addr: MasmAddr) { + // TODO migrate more in here? + asm.q.ucomisd_s_m(X(reg), A(addr)); + } + def emit_roundf_r_m(reg: Reg, addr: MasmAddr, rounding: X86_64Rounding) { + asm.roundss_s_m(X(reg), A(addr), rounding); + } + def emit_roundd_r_m(reg: Reg, addr: MasmAddr, rounding: X86_64Rounding) { + asm.roundsd_s_m(X(reg), A(addr), rounding); + } def emit_pop_r(kind: ValueKind, reg: Reg) { match (kind) { I32 => asm.d.popq_r(G(reg)); @@ -801,6 +892,9 @@ class X86_64MacroAssembler extends MacroAssembler { def emit_jump_r(reg: Reg) { asm.ijmp_r(G(reg)); } + def emit_xchgq_m_r(addr: MasmAddr, reg: Reg) { + asm.xchgq_m_r(A(addr), G(reg)); + } def emit_increment_CountProbe(tmp: Reg, probe: CountProbe, increment: u64) { var r1 = G(tmp); var refOffset = asm.movq_r_p(r1, Pointer.atObject(probe) - Pointer.NULL); @@ -990,11 +1084,24 @@ class X86_64MacroAssembler extends MacroAssembler { asm.d.sub_r_r(scratch, r); asm.movd_r_r(r, scratch); // XXX: can save an instruction here? } + def emit_i32_clz_r_m(r: Reg, m: MasmAddr) { + asm.movd_r_i(scratch, -1); + asm.d.bsr_r_m(G(r), A(m)); + asm.d.cmov_r(C.Z, G(r), scratch); + asm.movd_r_i(scratch, 31); + asm.d.sub_r_r(scratch, G(r)); + asm.movd_m_r(A(m), scratch); + } def emit_i32_ctz_r_r(r: X86_64Gpr, s: X86_64Gpr) { asm.d.bsf_r_r(r, s); asm.movd_r_i(scratch, 32); asm.d.cmov_r(C.Z, r, scratch); } + def emit_i32_ctz_r_m(r: Reg, m: MasmAddr) { + asm.d.bsf_r_m(G(r), A(m)); + asm.movd_r_i(scratch, 32); + asm.d.cmov_r(C.Z, G(r), scratch); + } def emit_i64_clz_r_r(r: X86_64Gpr, s: X86_64Gpr) { asm.movq_r_i(scratch, -1); asm.q.bsr_r_r(r, s); @@ -1003,11 +1110,24 @@ class X86_64MacroAssembler extends MacroAssembler { asm.q.sub_r_r(scratch, r); asm.movq_r_r(r, scratch); // XXX: can save an instruction with second output reg } + def emit_i64_clz_r_m(r: Reg, m: MasmAddr) { + asm.movq_r_i(scratch, -1); + asm.q.bsr_r_m(G(r), A(m)); + asm.q.cmov_r(C.Z, G(r), scratch); + asm.movq_r_i(scratch, 63); + asm.q.sub_r_r(scratch, G(r)); + asm.movq_m_r(A(m), scratch); + } def emit_i64_ctz_r_r(r: X86_64Gpr, s: X86_64Gpr) { asm.q.bsf_r_r(r, s); asm.movq_r_i(scratch, 64); asm.q.cmov_r(C.Z, r, scratch); } + def emit_i64_ctz_r_m(r: Reg, m: MasmAddr) { + asm.q.bsf_r_m(G(r), A(m)); + asm.movq_r_i(scratch, 64); + asm.q.cmov_r(C.Z, G(r), scratch); + } def emit_i64_extend_i32_s(r: X86_64Gpr) { asm.q.shl_r_i(r, 32); asm.q.sar_r_i(r, 32); @@ -1015,6 +1135,18 @@ class X86_64MacroAssembler extends MacroAssembler { def emit_i64_extend_i32_u(r: X86_64Gpr) { asm.movd_r_r(r, r); } + def emit_i32_extend8_s_r_m(reg: Reg, addr: MasmAddr) { + asm.d.movbsx_r_m(G(reg), A(addr)); + } + def emit_i32_extend16_s_r_m(reg: Reg, addr: MasmAddr) { + asm.d.movwsx_r_m(G(reg), A(addr)); + } + def emit_i64_extend8_s_r_m(reg: Reg, addr: MasmAddr) { + asm.q.movbsx_r_m(G(reg), A(addr)); + } + def emit_i64_extend16_s_r_m(reg: Reg, addr: MasmAddr) { + asm.q.movwsx_r_m(G(reg), A(addr)); + } // SSE assemblers and helpers // Masks for simd instructions def mask_i8x16_splat_0x0f: (u64, u64) = (0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F); From db2c66a5ef3305b34fab264841566f47464f5bfe Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Fri, 30 Jan 2026 11:02:28 -0500 Subject: [PATCH 2/2] Fix failing tests in VspHelper update --- src/engine/x86-64/X86_64Interpreter.v3 | 5 ++--- src/engine/x86-64/X86_64MacroAssembler.v3 | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/engine/x86-64/X86_64Interpreter.v3 b/src/engine/x86-64/X86_64Interpreter.v3 index 2980390c7..bb6e636b8 100644 --- a/src/engine/x86-64/X86_64Interpreter.v3 +++ b/src/engine/x86-64/X86_64Interpreter.v3 @@ -1107,7 +1107,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { // BR_ON_NULL: check condition and either fall thru to next bytecode or ctl xfer (with stack copying) bindHandler(Opcode.BR_ON_NULL); - masm.emit_br_m(vsph[-1].value, MasmBrCond.REF_NULL, controlFallThruLabel); + masm.emit_br_m(vsph[-1].value, MasmBrCond.REF_NONNULL, controlFallThruLabel); decrementVsp(); masm.emit_br(controlTransferLabel); @@ -1754,7 +1754,7 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { } bindHandler(Opcode.I64_MUL); { masm.emit_mov_r_m(ValueKind.I64, xenv.tmp0, vsph[-1].value); - masm.emit_binop_r_m(Opcode.I32_MUL, xenv.tmp0, vsph[-2].value); + masm.emit_binop_r_m(Opcode.I64_MUL, xenv.tmp0, vsph[-2].value); masm.emit_mov_m_r(ValueKind.I64, vsph[-2].value, xenv.tmp0); decrementVsp(); endHandler(); @@ -2159,7 +2159,6 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { masm.emit_and_r_i(xenv.tmp0, 0x7FFFFFFF); masm.emit_mov_r_m(ValueKind.I32, xenv.tmp1, vsph[-1].upper); masm.emit_and_r_i(xenv.tmp1, 0x80000000); - masm.emit_mov_r_m(ValueKind.I32, xenv.tmp1, vsph[-1].value); asm.d.or_r_r(r_tmp0, r_tmp1); masm.emit_mov_m_r(ValueKind.I32, vsph[-2].upper, xenv.tmp0); decrementVsp(); diff --git a/src/engine/x86-64/X86_64MacroAssembler.v3 b/src/engine/x86-64/X86_64MacroAssembler.v3 index 9de127d83..30d787d90 100644 --- a/src/engine/x86-64/X86_64MacroAssembler.v3 +++ b/src/engine/x86-64/X86_64MacroAssembler.v3 @@ -566,7 +566,7 @@ class X86_64MacroAssembler extends MacroAssembler { asm.d.and_m_i(A(addr), i); } def emit_xor_m_i(addr: MasmAddr, i: int) { - asm.d.and_m_i(A(addr), i); + asm.d.xor_m_i(A(addr), i); } def emit_popcntd_r_m(reg: Reg, addr: MasmAddr) { asm.d.popcnt_r_m(G(reg), A(addr));