From f6313356828ee72ce7c8b941addd4e9d4e229a53 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Thu, 23 Apr 2026 22:19:23 +0900 Subject: [PATCH 01/10] [mlir][dxsa] Add BinaryWriter to translate from MLIR to DXSA BinaryWriter translates from an MLIR module in DXSA dialect into a DXSA binary. It is a reverse of what BinaryParser does. Current implementation only supports standard instructions, and needs to be extended to support custom instructions. Instruction table is moved into a separate file (InstrInfo.def), so it can be shared between Parser/Writer, which build different data structures from it. Parser goes from opcodes to mnemonics, and Writer is reversed. Tests are extended to run MLIR in roundtrip to verify both the Parser and Writer. We also compare binary output with input to make sure that we do not lose any data during translation. --- mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td | 2 +- mlir/lib/Target/DXSA/BinaryParser.cpp | 316 +---------------- mlir/lib/Target/DXSA/BinaryWriter.cpp | 355 ++++++++++++++++++- mlir/lib/Target/DXSA/InstrInfo.def | 312 ++++++++++++++++ mlir/test/Target/DXSA/empty.mlir | 4 + mlir/test/Target/DXSA/mov-index.mlir | 15 + mlir/test/Target/DXSA/mov.mlir | 4 + mlir/test/Target/DXSA/ret.mlir | 3 + mlir/test/Target/DXSA/udiv.mlir | 13 + 9 files changed, 706 insertions(+), 318 deletions(-) create mode 100644 mlir/lib/Target/DXSA/InstrInfo.def diff --git a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td index 5039217ee07b..b10ed675bf43 100644 --- a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td +++ b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td @@ -128,7 +128,7 @@ def DXSA_IndexRelImm : DXSA_Op<"index.rel.imm"> { TODO }]; - let arguments = (ins DXSA_OperandType:$operand, StrAttr:$op, I64Attr:$imm); + let arguments = (ins DXSA_OperandType:$operand, StrAttr:$op, I32Attr:$imm); let results = (outs DXSA_IndexType:$index); let assemblyFormat = "$operand attr-dict"; } diff --git a/mlir/lib/Target/DXSA/BinaryParser.cpp b/mlir/lib/Target/DXSA/BinaryParser.cpp index 2ac6629e6d75..ed6480fdb30b 100644 --- a/mlir/lib/Target/DXSA/BinaryParser.cpp +++ b/mlir/lib/Target/DXSA/BinaryParser.cpp @@ -52,320 +52,8 @@ struct InstructionInfo { static void initInstructionInfo(MutableArrayRef instructions) { #define SET(OpCode, Name, NumOperands, PrecMask, OpClass) \ instructions[OpCode] = InstructionInfo{NumOperands, Name, OpClass, PrecMask}; - // clang-format off - SET(D3D10_SB_OPCODE_ADD, "add", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_AND, "and", 3, 0x06, D3D10_SB_BIT_OP); - SET(D3D10_SB_OPCODE_BREAK, "break", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_BREAKC, "breakc", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CALL, "call", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CALLC, "callc", 2, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CONTINUE, "continue", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CONTINUEC, "continuec", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CASE, "case", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CUT, "cut", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_DEFAULT, "default", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_DISCARD, "discard", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_DIV, "div", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_DP2, "dp2", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_DP3, "dp3", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_DP4, "dp4", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_ELSE, "else", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_EMIT, "emit", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_EMITTHENCUT, "emit_then_cut", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_ENDIF, "endif", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_ENDLOOP, "endloop", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_ENDSWITCH, "endswitch", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_EQ, "eq", 3, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_EXP, "exp", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_FRC, "frc", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_FTOI, "ftoi", 2, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_FTOU, "ftou", 2, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_GE, "ge", 3, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_DERIV_RTX, "deriv_rtx", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_DERIV_RTY, "deriv_rty", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_IADD, "iadd", 3, 0x06, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IF, "if", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_IEQ, "ieq", 3, 0x00, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IGE, "ige", 3, 0x00, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_ILT, "ilt", 3, 0x00, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IMAD, "imad", 4, 0x0e, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IMAX, "imax", 3, 0x06, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IMIN, "imin", 3, 0x06, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IMUL, "imul", 4, 0x0c, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_INE, "ine", 3, 0x00, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_INEG, "ineg", 2, 0x02, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_ISHL, "ishl", 3, 0x02, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_ISHR, "ishr", 3, 0x02, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_ITOF, "itof", 2, 0x00, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_LABEL, "label", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_LD, "ld", 3, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_LD_MS, "ldms", 4, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_LOG, "log", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_LOOP, "loop", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_LT, "lt", 3, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MAD, "mad", 4, 0x0e, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MAX, "max", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MIN, "min", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MOV, "mov", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MOVC, "movc", 4, 0x0c, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MUL, "mul", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_NE, "ne", 3, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_NOP, "nop", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_NOT, "not", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D10_SB_OPCODE_OR, "or", 3, 0x06, D3D10_SB_BIT_OP); - SET(D3D10_SB_OPCODE_RESINFO, "resinfo", 3, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_RET, "ret", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_RETC, "retc", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_ROUND_NE, "round_ne", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_ROUND_NI, "round_ni", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_ROUND_PI, "round_pi", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_ROUND_Z, "round_z", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_RSQ, "rsq", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_SAMPLE, "sample", 4, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SAMPLE_B, "sample_b", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SAMPLE_L, "sample_l", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SAMPLE_D, "sample_d", 6, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SAMPLE_C, "sample_c", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SAMPLE_C_LZ, "sample_c_lz", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SQRT, "sqrt", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_SWITCH, "switch", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_SINCOS, "sincos", 3, 0x04, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_UDIV, "udiv", 4, 0x0c, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_ULT, "ult", 3, 0x00, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UGE, "uge", 3, 0x00, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UMAX, "umax", 3, 0x06, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UMIN, "umin", 3, 0x06, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UMUL, "umul", 4, 0x0c, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UMAD, "umad", 4, 0x0e, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_USHR, "ushr", 3, 0x02, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UTOF, "utof", 2, 0x00, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_XOR, "xor", 3, 0x06, D3D10_SB_BIT_OP); - SET(D3D10_SB_OPCODE_RESERVED0, "jmp", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT, "dcl_input", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_OUTPUT, "dcl_output", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT_SGV, "dcl_input_sgv", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, "dcl_input_ps_sgv", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, "dcl_inputprimitive", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, "dcl_outputtopology", 0, - 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, "dcl_maxout", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT_PS, "dcl_input_ps", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, "dcl_constantbuffer", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_SAMPLER, "dcl_sampler", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_RESOURCE, "dcl_resource", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT_SIV, "dcl_input_siv", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, "dcl_input_ps_siv", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_OUTPUT_SIV, "dcl_output_siv", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_OUTPUT_SGV, "dcl_output_sgv", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_TEMPS, "dcl_temps", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, "dcl_indexableTemp", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INDEX_RANGE, "dcl_indexrange", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, "dcl_globalFlags", 0, 0x00, - D3D10_SB_DCL_OP); - - SET(D3D10_1_SB_OPCODE_SAMPLE_INFO, "sampleinfo", 2, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_1_SB_OPCODE_SAMPLE_POS, "samplepos", 3, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_1_SB_OPCODE_GATHER4, "gather4", 4, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_1_SB_OPCODE_LOD, "lod", 4, 0x00, D3D10_SB_TEX_OP); - - SET(D3D11_SB_OPCODE_EMIT_STREAM, "emit_stream", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D11_SB_OPCODE_CUT_STREAM, "cut_stream", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D11_SB_OPCODE_EMITTHENCUT_STREAM, "emit_then_cut_stream", 1, 0x00, - D3D10_SB_FLOW_OP); - SET(D3D11_SB_OPCODE_INTERFACE_CALL, "fcall", 1, 0x00, D3D10_SB_FLOW_OP); - - SET(D3D11_SB_OPCODE_DCL_STREAM, "dcl_stream", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_FUNCTION_BODY, "dcl_function_body", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_FUNCTION_TABLE, "dcl_function_table", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_INTERFACE, "dcl_interface", 0, 0x00, D3D10_SB_DCL_OP); - - SET(D3D11_SB_OPCODE_BUFINFO, "bufinfo", 2, 0x00, D3D10_SB_TEX_OP); - SET(D3D11_SB_OPCODE_DERIV_RTX_COARSE, "deriv_rtx_coarse", 2, 0x02, - D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_DERIV_RTX_FINE, "deriv_rtx_fine", 2, 0x02, - D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_DERIV_RTY_COARSE, "deriv_rty_coarse", 2, 0x02, - D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_DERIV_RTY_FINE, "deriv_rty_fine", 2, 0x02, - D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_GATHER4_C, "gather4_c", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D11_SB_OPCODE_GATHER4_PO, "gather4_po", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D11_SB_OPCODE_GATHER4_PO_C, "gather4_po_c", 6, 0x00, D3D10_SB_TEX_OP); - SET(D3D11_SB_OPCODE_RCP, "rcp", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_F32TOF16, "f32tof16", 2, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_F16TOF32, "f16tof32", 2, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_UADDC, "uaddc", 4, 0x0c, D3D10_SB_UINT_OP); - SET(D3D11_SB_OPCODE_USUBB, "usubb", 4, 0x0c, D3D10_SB_UINT_OP); - SET(D3D11_SB_OPCODE_COUNTBITS, "countbits", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_FIRSTBIT_HI, "firstbit_hi", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_FIRSTBIT_LO, "firstbit_lo", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_FIRSTBIT_SHI, "firstbit_shi", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_UBFE, "ubfe", 4, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_IBFE, "ibfe", 4, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_BFI, "bfi", 5, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_BFREV, "bfrev", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_SWAPC, "swapc", 5, 0x02, D3D10_SB_FLOAT_OP); - - SET(D3D11_SB_OPCODE_HS_DECLS, "hs_decls", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_HS_CONTROL_POINT_PHASE, "hs_control_point_phase", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_HS_FORK_PHASE, "hs_fork_phase", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_HS_JOIN_PHASE, "hs_join_phase", 0, 0x00, D3D10_SB_DCL_OP); - - SET(D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT, - "dcl_input_control_point_count", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT, - "dcl_output_control_point_count", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_TESS_DOMAIN, "dcl_tessellator_domain", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_TESS_PARTITIONING, "dcl_tessellator_partitioning", 0, - 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE, - "dcl_tessellator_output_primitive", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR, "dcl_hs_max_tessfactor", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT, - "dcl_hs_fork_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, - "dcl_hs_join_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP); - - SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP, "dcl_thread_group", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED, "dcl_uav_typed", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW, "dcl_uav_raw", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED, - "dcl_uav_structured", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW, "dcl_tgsm_raw", 1, - 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED, - "dcl_tgsm_structured", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_RESOURCE_RAW, "dcl_resource_raw", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED, "dcl_resource_structured", 1, - 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_LD_UAV_TYPED, "ld_uav_typed", 3, 0x00, D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_STORE_UAV_TYPED, "store_uav_typed", 3, 0x00, - D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_LD_RAW, "ld_raw", 3, 0x00, D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_STORE_RAW, "store_raw", 3, 0x00, D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_LD_STRUCTURED, "ld_structured", 4, 0x00, D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_STORE_STRUCTURED, "store_structured", 4, 0x00, - D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_ATOMIC_AND, "atomic_and", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_OR, "atomic_or", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_XOR, "atomic_xor", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_CMP_STORE, "atomic_cmp_store", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_IADD, "atomic_iadd", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_IMAX, "atomic_imax", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_IMIN, "atomic_imin", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_UMAX, "atomic_umax", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_UMIN, "atomic_umin", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_ALLOC, "imm_atomic_alloc", 2, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_CONSUME, "imm_atomic_consume", 2, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_IADD, "imm_atomic_iadd", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_AND, "imm_atomic_and", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_OR, "imm_atomic_or", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_XOR, "imm_atomic_xor", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_EXCH, "imm_atomic_exch", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH, "imm_atomic_cmp_exch", 5, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_IMAX, "imm_atomic_imax", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_IMIN, "imm_atomic_imin", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_UMAX, "imm_atomic_umax", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_UMIN, "imm_atomic_umin", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_SYNC, "sync", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D11_SB_OPCODE_EVAL_SNAPPED, "eval_snapped", 3, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX, "eval_sample_index", 3, 0x02, - D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_EVAL_CENTROID, "eval_centroid", 2, 0x02, - D3D10_SB_FLOAT_OP); - - SET(D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT, "dcl_gsinstances", 0, 0x00, - D3D10_SB_DCL_OP); - - SET(D3D11_SB_OPCODE_DADD, "dadd", 3, 0x06, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DMAX, "dmax", 3, 0x06, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DMIN, "dmin", 3, 0x06, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DMUL, "dmul", 3, 0x06, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DEQ, "deq", 3, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DGE, "dge", 3, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DLT, "dlt", 3, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DNE, "dne", 3, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DMOV, "dmov", 2, 0x02, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DMOVC, "dmovc", 4, 0x0c, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DTOF, "dtof", 2, 0x02, D3D11_SB_DOUBLE_TO_FLOAT_OP); - SET(D3D11_SB_OPCODE_FTOD, "ftod", 2, 0x00, D3D11_SB_FLOAT_TO_DOUBLE_OP); - - SET(D3D11_SB_OPCODE_ABORT, "abort", 0, 0x00, D3D11_SB_DEBUG_OP); - SET(D3D11_SB_OPCODE_DEBUG_BREAK, "debug_break", 0, 0x00, D3D11_SB_DEBUG_OP); - - SET(D3D11_1_SB_OPCODE_DDIV, "ddiv", 3, 0x06, D3D11_SB_DOUBLE_OP); - SET(D3D11_1_SB_OPCODE_DFMA, "dfma", 4, 0x0e, D3D11_SB_DOUBLE_OP); - SET(D3D11_1_SB_OPCODE_DRCP, "drcp", 2, 0x02, D3D11_SB_DOUBLE_OP); - - SET(D3D11_1_SB_OPCODE_MSAD, "msad", 4, 0x0e, D3D10_SB_UINT_OP); - - SET(D3D11_1_SB_OPCODE_DTOI, "dtoi", 2, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_1_SB_OPCODE_DTOU, "dtou", 2, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_1_SB_OPCODE_ITOD, "itod", 2, 0x00, D3D10_SB_INT_OP); - SET(D3D11_1_SB_OPCODE_UTOD, "utod", 2, 0x00, D3D10_SB_UINT_OP); - - SET(D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK, "gather4_s", 5, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK, "gather4_c_s", 6, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK, "gather4_po_s", 6, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK, "gather4_po_c_s", 7, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_LD_FEEDBACK, "ld_s", 4, 0x00, D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK, "ldms_s", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_LD_UAV_TYPED_FEEDBACK, "ld_uav_typed_s", 4, 0x00, - D3D11_SB_MEM_OP); - SET(D3DWDDM1_3_SB_OPCODE_LD_RAW_FEEDBACK, "ld_raw_s", 4, 0x00, - D3D11_SB_MEM_OP); - SET(D3DWDDM1_3_SB_OPCODE_LD_STRUCTURED_FEEDBACK, "ld_structured_s", 5, 0x00, - D3D11_SB_MEM_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK, "sample_l_s", 6, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK, "sample_c_lz_s", 6, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK, "sample_cl_s", 6, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK, "sample_b_cl_s", 7, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK, "sample_d_cl_s", 8, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK, "sample_c_cl_s", 7, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_CHECK_ACCESS_FULLY_MAPPED, - "check_access_fully_mapped", 2, 0x00, D3D10_SB_TEX_OP); - // clang-format on +#include "InstrInfo.def" +#undef SET } struct InstructionModifier { diff --git a/mlir/lib/Target/DXSA/BinaryWriter.cpp b/mlir/lib/Target/DXSA/BinaryWriter.cpp index 4c19d41a7790..11759cac7ed0 100644 --- a/mlir/lib/Target/DXSA/BinaryWriter.cpp +++ b/mlir/lib/Target/DXSA/BinaryWriter.cpp @@ -17,10 +17,359 @@ using namespace mlir; using namespace llvm; +using OpcodeMap = llvm::DenseMap; + +static void initOpcodeMap(OpcodeMap &opcodes) { +#define SET(OpCode, Name, NumOperands, PrecMask, OpClass) \ + opcodes[Name] = OpCode; +#include "InstrInfo.def" +#undef SET +} + +static FailureOr getIndexRepresentation(Operation *op) { + if (auto imm = dyn_cast(op)) { + auto attr = dyn_cast(imm.getImm()); + if (!attr) { + emitError(op->getLoc(), "invalid immediate index"); + return failure(); + } + + if (attr.getType().isInteger(32)) { + return D3D10_SB_OPERAND_INDEX_IMMEDIATE32; + } + + if (attr.getType().isInteger(64)) { + return D3D10_SB_OPERAND_INDEX_IMMEDIATE64; + } + + emitError(op->getLoc(), "invalid immediate index type"); + return failure(); + } + + if (isa(op)) { + return D3D10_SB_OPERAND_INDEX_RELATIVE; + } + + if (isa(op)) { + return D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; + } + + emitError(op->getLoc(), "invalid index type"); + return failure(); +} + +class Writer { +public: + Writer(raw_ostream &output) : output(output, endianness::little) { + initOpcodeMap(opcodeMap); + } + + LogicalResult emitModule(ModuleOp source) { + Region ®ion = source.getRegion(); + if (!region.hasOneBlock()) { + emitError(region.getLoc(), "region should contain only one block"); + return failure(); + } + + for (auto &op : region.front()) { + if (auto inst = dyn_cast(op)) { + if (failed(emitInstruction(inst))) { + return failure(); + } + } + } + return success(); + } + + // Emit an instruction and all its operands recursively. + // FIXME: add extended instructions + LogicalResult emitInstruction(dxsa::Instruction inst) { + // Buffer all tokens for an instruction, so we can fixup + // instruction length before emitting tokens to the output. + buffer.clear(); + + auto opcodeIt = opcodeMap.find(inst.getMnemonic()); + if (opcodeIt == opcodeMap.end()) { + emitError(inst.getLoc(), "unknown mnemonic"); + return failure(); + } + + // First token is an opcode and length. Length is unknown until we + // process all operands. + uint32_t opcode = opcodeIt->second; + uint32_t token = ENCODE_D3D10_SB_OPCODE_TYPE(opcode); + buffer.push_back(token); + + for (Value value : inst.getOperands()) { + Operation *op = value.getDefiningOp(); + if (!op) { + emitError(value.getLoc(), "undefined operand"); + return failure(); + } + + if (auto operand = dyn_cast(*op)) { + if (failed(emitOperand(operand))) { + return failure(); + } + continue; + } + + if (auto operand = dyn_cast(*op)) { + if (failed(emitOperandImm(operand))) { + return failure(); + } + continue; + } + + emitError(op->getLoc(), "unexpected operand kind"); + return failure(); + } + + // Fixup instruction length after all operands are accumulated in + // the buffer. + buffer[0] |= ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(buffer.size()); + for (uint32_t token : buffer) { + output.write(token); + } + + return success(); + } + + // Emit an operand and all its indices recursively. + LogicalResult emitOperand(dxsa::Operand op) { + uint32_t token = ENCODE_D3D10_SB_OPERAND_TYPE(op.getType()); + + // Encode swizzle, mask, or one component selection. + switch (op.getNumComponents()) { + case 0: { + token |= + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_0_COMPONENT); + break; + } + case 1: { + token |= + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_1_COMPONENT); + break; + } + case 4: { + token |= + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT); + if (auto mask = op.getMask()) { + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( + D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE); + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(*mask); + } else if (auto swizzle = op.getSwizzle()) { + SmallVector values; + for (APInt v : *swizzle) { + values.push_back(v.getZExtValue()); + } + if (values.size() != 4) { + emitError(op.getLoc(), "invalid number of swizzle values"); + return failure(); + } + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( + D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE); + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE( + values[0], values[1], values[2], values[3]); + break; + } else if (auto one = op.getOne()) { + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( + D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE); + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(*one); + break; + } + break; + } + default: { + emitError(op.getLoc(), "invalid number of components"); + return failure(); + } + } + + // Operand token encodes types and number of indices that follow + // it. + token |= ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(op.getNumOperands()); + uint32_t dim = 0; + for (Value value : op.getOperands()) { + Operation *index = value.getDefiningOp(); + if (!index) { + emitError(value.getLoc(), "index must be defined"); + return failure(); + } + + FailureOr repr = getIndexRepresentation(index); + if (failed(repr)) { + return failure(); + } + token |= ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION(dim, *repr); + dim += 1; + } + + buffer.push_back(token); + + // Indices follow the operand token. + for (Value value : op.getOperands()) { + Operation *index = value.getDefiningOp(); + if (!index) { + emitError(value.getLoc(), "index must be defined"); + return failure(); + } + + if (auto indexImm = dyn_cast(*index)) { + if (failed(emitIndexImm(indexImm))) { + return failure(); + } + continue; + } + + if (auto indexRel = dyn_cast(*index)) { + if (failed(emitIndexRel(indexRel))) { + return failure(); + } + continue; + } + + if (auto indexRelImm = dyn_cast(*index)) { + if (failed(emitIndexRelImm(indexRelImm))) { + return failure(); + } + continue; + } + + emitError(value.getLoc(), "invalid index type"); + return failure(); + } + + return success(); + } + + // Emit an immediate operand. Unlike register operands, immediate + // operands do not have indices. They are encoded as an operand + // followed by N immediate values for each component. + LogicalResult emitOperandImm(dxsa::OperandImm op) { + auto attr = dyn_cast(op.getImm()); + if (!attr) { + emitError(op.getLoc(), "invalid immediate operand"); + } + + uint32_t token = 0; + + Type elementType = attr.getType().getElementType(); + if (elementType.isInteger(32)) { + token |= ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_IMMEDIATE32); + } else if (elementType.isInteger(64)) { + token |= ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_IMMEDIATE64); + } else { + emitError(op.getLoc(), "invalid immediate operand type"); + return failure(); + } + + // Split immediates into tokens. 32 bit immediate values are + // encoded as is, and 64 bit immediates are split into high and + // low 32 bit parts. + SmallVector values; + for (APInt v : attr) { + uint64_t bits = v.getZExtValue(); + if (v.getBitWidth() == 64) { + values.push_back(bits >> 32); + } + values.push_back(bits); + } + + if (values.size() == 1) { + token |= + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_1_COMPONENT); + } else if (values.size() == 4) { + token |= + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT); + } else { + emitError(op.getLoc(), + "immediate operand should be either 1- or 4- component"); + return failure(); + } + + buffer.push_back(token); + for (uint32_t v : values) { + buffer.push_back(v); + } + + return success(); + } + + // Emit an immediate index. Its type is encoded into the operand, so + // here we only emit the value as tokens. + LogicalResult emitIndexImm(dxsa::IndexImm op) { + auto attr = dyn_cast(op.getImm()); + if (!attr) { + emitError(op.getLoc(), "invalid immediate index"); + return failure(); + } + + uint64_t value = attr.getInt(); + if (attr.getType().isInteger(32)) { + buffer.push_back(value); + return success(); + } + + if (attr.getType().isInteger(64)) { + buffer.push_back(value >> 32); + buffer.push_back(value); + return success(); + } + + emitError(op.getLoc(), "invalid type of an immediate index"); + return failure(); + } + + // Emit an operand used as an index. + LogicalResult emitIndexRel(dxsa::IndexRel index) { + Operation *def = index.getOperand().getDefiningOp(); + if (!def) { + emitError(index.getLoc(), "index must be defined"); + return failure(); + } + + auto operand = dyn_cast(*def); + if (!operand) { + emitError(def->getLoc(), "invalid index relative operand"); + return failure(); + } + + // Recursively emit an operand, which may also have other indices. + return emitOperand(operand); + } + + // Emit an index as an operand + a 32 bit immediate offset. + LogicalResult emitIndexRelImm(dxsa::IndexRelImm index) { + Operation *def = index.getOperand().getDefiningOp(); + if (!def) { + emitError(index.getLoc(), "index must be defined"); + return failure(); + } + + auto operand = dyn_cast(*def); + if (!operand) { + emitError(def->getLoc(), "invalid index relative operand"); + return failure(); + } + + if (failed(emitOperand(operand))) { + return failure(); + } + + buffer.push_back(index.getImm()); + return success(); + } + +private: + std::vector buffer; + support::endian::Writer output; + OpcodeMap opcodeMap; +}; + namespace mlir::dxsa { LogicalResult exportModuleToDxsaBinary(ModuleOp source, raw_ostream &output) { - Region ®ion = source.getRegion(); - assert(region.hasOneBlock() && "invalid module"); - return failure(); + Writer writer(output); + return writer.emitModule(source); } } // namespace mlir::dxsa diff --git a/mlir/lib/Target/DXSA/InstrInfo.def b/mlir/lib/Target/DXSA/InstrInfo.def new file mode 100644 index 000000000000..35a36db3285e --- /dev/null +++ b/mlir/lib/Target/DXSA/InstrInfo.def @@ -0,0 +1,312 @@ +SET(D3D10_SB_OPCODE_ADD, "add", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_AND, "and", 3, 0x06, D3D10_SB_BIT_OP); +SET(D3D10_SB_OPCODE_BREAK, "break", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_BREAKC, "breakc", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CALL, "call", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CALLC, "callc", 2, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CONTINUE, "continue", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CONTINUEC, "continuec", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CASE, "case", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CUT, "cut", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_DEFAULT, "default", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_DISCARD, "discard", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_DIV, "div", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_DP2, "dp2", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_DP3, "dp3", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_DP4, "dp4", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_ELSE, "else", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_EMIT, "emit", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_EMITTHENCUT, "emit_then_cut", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_ENDIF, "endif", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_ENDLOOP, "endloop", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_ENDSWITCH, "endswitch", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_EQ, "eq", 3, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_EXP, "exp", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_FRC, "frc", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_FTOI, "ftoi", 2, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_FTOU, "ftou", 2, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_GE, "ge", 3, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_DERIV_RTX, "deriv_rtx", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_DERIV_RTY, "deriv_rty", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_IADD, "iadd", 3, 0x06, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IF, "if", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_IEQ, "ieq", 3, 0x00, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IGE, "ige", 3, 0x00, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_ILT, "ilt", 3, 0x00, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IMAD, "imad", 4, 0x0e, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IMAX, "imax", 3, 0x06, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IMIN, "imin", 3, 0x06, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IMUL, "imul", 4, 0x0c, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_INE, "ine", 3, 0x00, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_INEG, "ineg", 2, 0x02, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_ISHL, "ishl", 3, 0x02, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_ISHR, "ishr", 3, 0x02, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_ITOF, "itof", 2, 0x00, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_LABEL, "label", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_LD, "ld", 3, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_LD_MS, "ldms", 4, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_LOG, "log", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_LOOP, "loop", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_LT, "lt", 3, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MAD, "mad", 4, 0x0e, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MAX, "max", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MIN, "min", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MOV, "mov", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MOVC, "movc", 4, 0x0c, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MUL, "mul", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_NE, "ne", 3, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_NOP, "nop", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_NOT, "not", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D10_SB_OPCODE_OR, "or", 3, 0x06, D3D10_SB_BIT_OP); +SET(D3D10_SB_OPCODE_RESINFO, "resinfo", 3, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_RET, "ret", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_RETC, "retc", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_ROUND_NE, "round_ne", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_ROUND_NI, "round_ni", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_ROUND_PI, "round_pi", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_ROUND_Z, "round_z", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_RSQ, "rsq", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_SAMPLE, "sample", 4, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SAMPLE_B, "sample_b", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SAMPLE_L, "sample_l", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SAMPLE_D, "sample_d", 6, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SAMPLE_C, "sample_c", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SAMPLE_C_LZ, "sample_c_lz", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SQRT, "sqrt", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_SWITCH, "switch", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_SINCOS, "sincos", 3, 0x04, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_UDIV, "udiv", 4, 0x0c, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_ULT, "ult", 3, 0x00, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UGE, "uge", 3, 0x00, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UMAX, "umax", 3, 0x06, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UMIN, "umin", 3, 0x06, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UMUL, "umul", 4, 0x0c, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UMAD, "umad", 4, 0x0e, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_USHR, "ushr", 3, 0x02, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UTOF, "utof", 2, 0x00, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_XOR, "xor", 3, 0x06, D3D10_SB_BIT_OP); +SET(D3D10_SB_OPCODE_RESERVED0, "jmp", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT, "dcl_input", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_OUTPUT, "dcl_output", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT_SGV, "dcl_input_sgv", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, "dcl_input_ps_sgv", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, "dcl_inputprimitive", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, "dcl_outputtopology", 0, + 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, "dcl_maxout", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT_PS, "dcl_input_ps", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, "dcl_constantbuffer", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_SAMPLER, "dcl_sampler", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_RESOURCE, "dcl_resource", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT_SIV, "dcl_input_siv", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, "dcl_input_ps_siv", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_OUTPUT_SIV, "dcl_output_siv", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_OUTPUT_SGV, "dcl_output_sgv", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_TEMPS, "dcl_temps", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, "dcl_indexableTemp", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INDEX_RANGE, "dcl_indexrange", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, "dcl_globalFlags", 0, 0x00, + D3D10_SB_DCL_OP); + +SET(D3D10_1_SB_OPCODE_SAMPLE_INFO, "sampleinfo", 2, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_1_SB_OPCODE_SAMPLE_POS, "samplepos", 3, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_1_SB_OPCODE_GATHER4, "gather4", 4, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_1_SB_OPCODE_LOD, "lod", 4, 0x00, D3D10_SB_TEX_OP); + +SET(D3D11_SB_OPCODE_EMIT_STREAM, "emit_stream", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D11_SB_OPCODE_CUT_STREAM, "cut_stream", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D11_SB_OPCODE_EMITTHENCUT_STREAM, "emit_then_cut_stream", 1, 0x00, + D3D10_SB_FLOW_OP); +SET(D3D11_SB_OPCODE_INTERFACE_CALL, "fcall", 1, 0x00, D3D10_SB_FLOW_OP); + +SET(D3D11_SB_OPCODE_DCL_STREAM, "dcl_stream", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_FUNCTION_BODY, "dcl_function_body", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_FUNCTION_TABLE, "dcl_function_table", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_INTERFACE, "dcl_interface", 0, 0x00, D3D10_SB_DCL_OP); + +SET(D3D11_SB_OPCODE_BUFINFO, "bufinfo", 2, 0x00, D3D10_SB_TEX_OP); +SET(D3D11_SB_OPCODE_DERIV_RTX_COARSE, "deriv_rtx_coarse", 2, 0x02, + D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_DERIV_RTX_FINE, "deriv_rtx_fine", 2, 0x02, + D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_DERIV_RTY_COARSE, "deriv_rty_coarse", 2, 0x02, + D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_DERIV_RTY_FINE, "deriv_rty_fine", 2, 0x02, + D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_GATHER4_C, "gather4_c", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D11_SB_OPCODE_GATHER4_PO, "gather4_po", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D11_SB_OPCODE_GATHER4_PO_C, "gather4_po_c", 6, 0x00, D3D10_SB_TEX_OP); +SET(D3D11_SB_OPCODE_RCP, "rcp", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_F32TOF16, "f32tof16", 2, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_F16TOF32, "f16tof32", 2, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_UADDC, "uaddc", 4, 0x0c, D3D10_SB_UINT_OP); +SET(D3D11_SB_OPCODE_USUBB, "usubb", 4, 0x0c, D3D10_SB_UINT_OP); +SET(D3D11_SB_OPCODE_COUNTBITS, "countbits", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_FIRSTBIT_HI, "firstbit_hi", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_FIRSTBIT_LO, "firstbit_lo", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_FIRSTBIT_SHI, "firstbit_shi", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_UBFE, "ubfe", 4, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_IBFE, "ibfe", 4, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_BFI, "bfi", 5, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_BFREV, "bfrev", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_SWAPC, "swapc", 5, 0x02, D3D10_SB_FLOAT_OP); + +SET(D3D11_SB_OPCODE_HS_DECLS, "hs_decls", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_HS_CONTROL_POINT_PHASE, "hs_control_point_phase", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_HS_FORK_PHASE, "hs_fork_phase", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_HS_JOIN_PHASE, "hs_join_phase", 0, 0x00, D3D10_SB_DCL_OP); + +SET(D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT, + "dcl_input_control_point_count", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT, + "dcl_output_control_point_count", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_TESS_DOMAIN, "dcl_tessellator_domain", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_TESS_PARTITIONING, "dcl_tessellator_partitioning", 0, + 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE, + "dcl_tessellator_output_primitive", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR, "dcl_hs_max_tessfactor", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT, + "dcl_hs_fork_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, + "dcl_hs_join_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP); + +SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP, "dcl_thread_group", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED, "dcl_uav_typed", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW, "dcl_uav_raw", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED, + "dcl_uav_structured", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW, "dcl_tgsm_raw", 1, + 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED, + "dcl_tgsm_structured", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_RESOURCE_RAW, "dcl_resource_raw", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED, "dcl_resource_structured", 1, + 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_LD_UAV_TYPED, "ld_uav_typed", 3, 0x00, D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_STORE_UAV_TYPED, "store_uav_typed", 3, 0x00, + D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_LD_RAW, "ld_raw", 3, 0x00, D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_STORE_RAW, "store_raw", 3, 0x00, D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_LD_STRUCTURED, "ld_structured", 4, 0x00, D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_STORE_STRUCTURED, "store_structured", 4, 0x00, + D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_ATOMIC_AND, "atomic_and", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_OR, "atomic_or", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_XOR, "atomic_xor", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_CMP_STORE, "atomic_cmp_store", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_IADD, "atomic_iadd", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_IMAX, "atomic_imax", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_IMIN, "atomic_imin", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_UMAX, "atomic_umax", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_UMIN, "atomic_umin", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_ALLOC, "imm_atomic_alloc", 2, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_CONSUME, "imm_atomic_consume", 2, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_IADD, "imm_atomic_iadd", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_AND, "imm_atomic_and", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_OR, "imm_atomic_or", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_XOR, "imm_atomic_xor", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_EXCH, "imm_atomic_exch", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH, "imm_atomic_cmp_exch", 5, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_IMAX, "imm_atomic_imax", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_IMIN, "imm_atomic_imin", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_UMAX, "imm_atomic_umax", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_UMIN, "imm_atomic_umin", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_SYNC, "sync", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D11_SB_OPCODE_EVAL_SNAPPED, "eval_snapped", 3, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX, "eval_sample_index", 3, 0x02, + D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_EVAL_CENTROID, "eval_centroid", 2, 0x02, + D3D10_SB_FLOAT_OP); + +SET(D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT, "dcl_gsinstances", 0, 0x00, + D3D10_SB_DCL_OP); + +SET(D3D11_SB_OPCODE_DADD, "dadd", 3, 0x06, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DMAX, "dmax", 3, 0x06, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DMIN, "dmin", 3, 0x06, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DMUL, "dmul", 3, 0x06, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DEQ, "deq", 3, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DGE, "dge", 3, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DLT, "dlt", 3, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DNE, "dne", 3, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DMOV, "dmov", 2, 0x02, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DMOVC, "dmovc", 4, 0x0c, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DTOF, "dtof", 2, 0x02, D3D11_SB_DOUBLE_TO_FLOAT_OP); +SET(D3D11_SB_OPCODE_FTOD, "ftod", 2, 0x00, D3D11_SB_FLOAT_TO_DOUBLE_OP); + +SET(D3D11_SB_OPCODE_ABORT, "abort", 0, 0x00, D3D11_SB_DEBUG_OP); +SET(D3D11_SB_OPCODE_DEBUG_BREAK, "debug_break", 0, 0x00, D3D11_SB_DEBUG_OP); + +SET(D3D11_1_SB_OPCODE_DDIV, "ddiv", 3, 0x06, D3D11_SB_DOUBLE_OP); +SET(D3D11_1_SB_OPCODE_DFMA, "dfma", 4, 0x0e, D3D11_SB_DOUBLE_OP); +SET(D3D11_1_SB_OPCODE_DRCP, "drcp", 2, 0x02, D3D11_SB_DOUBLE_OP); + +SET(D3D11_1_SB_OPCODE_MSAD, "msad", 4, 0x0e, D3D10_SB_UINT_OP); + +SET(D3D11_1_SB_OPCODE_DTOI, "dtoi", 2, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_1_SB_OPCODE_DTOU, "dtou", 2, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_1_SB_OPCODE_ITOD, "itod", 2, 0x00, D3D10_SB_INT_OP); +SET(D3D11_1_SB_OPCODE_UTOD, "utod", 2, 0x00, D3D10_SB_UINT_OP); + +SET(D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK, "gather4_s", 5, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK, "gather4_c_s", 6, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK, "gather4_po_s", 6, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK, "gather4_po_c_s", 7, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_LD_FEEDBACK, "ld_s", 4, 0x00, D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK, "ldms_s", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_LD_UAV_TYPED_FEEDBACK, "ld_uav_typed_s", 4, 0x00, + D3D11_SB_MEM_OP); +SET(D3DWDDM1_3_SB_OPCODE_LD_RAW_FEEDBACK, "ld_raw_s", 4, 0x00, + D3D11_SB_MEM_OP); +SET(D3DWDDM1_3_SB_OPCODE_LD_STRUCTURED_FEEDBACK, "ld_structured_s", 5, 0x00, + D3D11_SB_MEM_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK, "sample_l_s", 6, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK, "sample_c_lz_s", 6, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK, "sample_cl_s", 6, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK, "sample_b_cl_s", 7, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK, "sample_d_cl_s", 8, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK, "sample_c_cl_s", 7, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_CHECK_ACCESS_FULLY_MAPPED, + "check_access_fully_mapped", 2, 0x00, D3D10_SB_TEX_OP); diff --git a/mlir/test/Target/DXSA/empty.mlir b/mlir/test/Target/DXSA/empty.mlir index 19d6cd55d36e..ed5a35ec9bfd 100644 --- a/mlir/test/Target/DXSA/empty.mlir +++ b/mlir/test/Target/DXSA/empty.mlir @@ -1,4 +1,8 @@ // RUN: mlir-translate --import-dxsa-bin | FileCheck %s +// RUN: mlir-translate --export-dxsa-bin %s -o - | mlir-translate --import-dxsa-bin - | FileCheck %s // CHECK: module { // CHECK-NEXT } + +module { +} diff --git a/mlir/test/Target/DXSA/mov-index.mlir b/mlir/test/Target/DXSA/mov-index.mlir index ff3ac88aa078..eb2323ebdb53 100644 --- a/mlir/test/Target/DXSA/mov-index.mlir +++ b/mlir/test/Target/DXSA/mov-index.mlir @@ -1,4 +1,8 @@ // RUN: mlir-translate --import-dxsa-bin %S/inputs/mov-index.bin | FileCheck %s +// RUN: mlir-translate --export-dxsa-bin %s -o %t.bin +// RUN: mlir-translate --import-dxsa-bin %t.bin | FileCheck %s +// RUN: diff %t.bin %S/inputs/mov-index.bin + // mov o0.xyzw, v[r0.x][0].xyzw // CHECK: module { @@ -11,3 +15,14 @@ // CHECK-NEXT: %6 = dxsa.operand %4, %5 {num_components = 4 : i32, swizzle = dense<[0, 1, 2, 3]> : vector<4xi32>, type = 1 : i32} // CHECK-NEXT: dxsa.instruction "mov" %1, %6 // CHECK-NEXT: } + +module { + %0 = dxsa.index.imm {imm = 0 : i32} + %1 = dxsa.operand %0 {mask = 240 : i32, num_components = 4 : i32, type = 2 : i32} + %2 = dxsa.index.imm {imm = 0 : i32} + %3 = dxsa.operand %2 {num_components = 4 : i32, one = 0 : i32, type = 0 : i32} + %4 = dxsa.index.rel %3 + %5 = dxsa.index.imm {imm = 0 : i32} + %6 = dxsa.operand %4, %5 {num_components = 4 : i32, swizzle = dense<[0, 1, 2, 3]> : vector<4xi32>, type = 1 : i32} + dxsa.instruction "mov" %1, %6 +} diff --git a/mlir/test/Target/DXSA/mov.mlir b/mlir/test/Target/DXSA/mov.mlir index 4e4bd9989df0..de2854b3e0a4 100644 --- a/mlir/test/Target/DXSA/mov.mlir +++ b/mlir/test/Target/DXSA/mov.mlir @@ -1,4 +1,8 @@ // RUN: mlir-translate --import-dxsa-bin %S/inputs/mov.bin | FileCheck %s +// RUN: mlir-translate --export-dxsa-bin %s -o %t.bin +// RUN: mlir-translate --import-dxsa-bin %t.bin | FileCheck %s +// RUN: diff %t.bin %S/inputs/mov.bin + // mov r0.x, l(3.000000) // CHECK: module { diff --git a/mlir/test/Target/DXSA/ret.mlir b/mlir/test/Target/DXSA/ret.mlir index 24e0ec711ffc..b9b352bcab74 100644 --- a/mlir/test/Target/DXSA/ret.mlir +++ b/mlir/test/Target/DXSA/ret.mlir @@ -1,4 +1,7 @@ // RUN: mlir-translate --import-dxsa-bin %S/inputs/ret.bin | FileCheck %s +// RUN: mlir-translate --export-dxsa-bin %s -o %t.bin +// RUN: mlir-translate --import-dxsa-bin %t.bin | FileCheck %s +// RUN: diff %t.bin %S/inputs/ret.bin // CHECK: module { // CHECK-NEXT: dxsa.instruction "ret" diff --git a/mlir/test/Target/DXSA/udiv.mlir b/mlir/test/Target/DXSA/udiv.mlir index cbd535c7df8e..69f13aa71488 100644 --- a/mlir/test/Target/DXSA/udiv.mlir +++ b/mlir/test/Target/DXSA/udiv.mlir @@ -1,4 +1,8 @@ // RUN: mlir-translate --import-dxsa-bin %S/inputs/udiv.bin | FileCheck %s +// RUN: mlir-translate --export-dxsa-bin %s -o %t.bin +// RUN: mlir-translate --import-dxsa-bin %t.bin | FileCheck %s +// RUN: diff %t.bin %S/inputs/udiv.bin + // udiv NULL, r0.x, vOutputControlPointID, 4 // CHECK: module { @@ -9,3 +13,12 @@ // CHECK-NEXT: %4 = dxsa.operand.imm {imm = dense<4> : vector<1xi32>} // CHECK-NEXT: dxsa.instruction "udiv" %0, %2, %3, %4 // CHECK-NEXT: } + +module { + %0 = dxsa.operand {num_components = 0 : i32, type = 13 : i32} + %1 = dxsa.index.imm {imm = 0 : i32} + %2 = dxsa.operand %1 {mask = 16 : i32, num_components = 4 : i32, type = 0 : i32} + %3 = dxsa.operand {num_components = 1 : i32, type = 22 : i32} + %4 = dxsa.operand.imm {imm = dense<4> : vector<1xi32>} + dxsa.instruction "udiv" %0, %2, %3, %4 +} From ef42ce826d68323dc7db04fd2cd2fe23fa68f563 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Wed, 29 Apr 2026 19:04:36 +0900 Subject: [PATCH 02/10] Return emitError --- mlir/lib/Target/DXSA/BinaryWriter.cpp | 64 +++++++++------------------ 1 file changed, 22 insertions(+), 42 deletions(-) diff --git a/mlir/lib/Target/DXSA/BinaryWriter.cpp b/mlir/lib/Target/DXSA/BinaryWriter.cpp index 11759cac7ed0..89a710151c90 100644 --- a/mlir/lib/Target/DXSA/BinaryWriter.cpp +++ b/mlir/lib/Target/DXSA/BinaryWriter.cpp @@ -30,8 +30,7 @@ static FailureOr getIndexRepresentation(Operation *op) { if (auto imm = dyn_cast(op)) { auto attr = dyn_cast(imm.getImm()); if (!attr) { - emitError(op->getLoc(), "invalid immediate index"); - return failure(); + return emitError(op->getLoc(), "invalid immediate index"); } if (attr.getType().isInteger(32)) { @@ -42,8 +41,7 @@ static FailureOr getIndexRepresentation(Operation *op) { return D3D10_SB_OPERAND_INDEX_IMMEDIATE64; } - emitError(op->getLoc(), "invalid immediate index type"); - return failure(); + return emitError(op->getLoc(), "invalid immediate index type"); } if (isa(op)) { @@ -54,8 +52,7 @@ static FailureOr getIndexRepresentation(Operation *op) { return D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; } - emitError(op->getLoc(), "invalid index type"); - return failure(); + return emitError(op->getLoc(), "invalid index type"); } class Writer { @@ -67,8 +64,7 @@ class Writer { LogicalResult emitModule(ModuleOp source) { Region ®ion = source.getRegion(); if (!region.hasOneBlock()) { - emitError(region.getLoc(), "region should contain only one block"); - return failure(); + return emitError(region.getLoc(), "region should contain only one block"); } for (auto &op : region.front()) { @@ -90,8 +86,7 @@ class Writer { auto opcodeIt = opcodeMap.find(inst.getMnemonic()); if (opcodeIt == opcodeMap.end()) { - emitError(inst.getLoc(), "unknown mnemonic"); - return failure(); + return emitError(inst.getLoc(), "unknown mnemonic"); } // First token is an opcode and length. Length is unknown until we @@ -103,8 +98,7 @@ class Writer { for (Value value : inst.getOperands()) { Operation *op = value.getDefiningOp(); if (!op) { - emitError(value.getLoc(), "undefined operand"); - return failure(); + return emitError(value.getLoc(), "undefined operand"); } if (auto operand = dyn_cast(*op)) { @@ -121,8 +115,7 @@ class Writer { continue; } - emitError(op->getLoc(), "unexpected operand kind"); - return failure(); + return emitError(op->getLoc(), "unexpected operand kind"); } // Fixup instruction length after all operands are accumulated in @@ -164,8 +157,7 @@ class Writer { values.push_back(v.getZExtValue()); } if (values.size() != 4) { - emitError(op.getLoc(), "invalid number of swizzle values"); - return failure(); + return emitError(op.getLoc(), "invalid number of swizzle values"); } token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE); @@ -181,8 +173,7 @@ class Writer { break; } default: { - emitError(op.getLoc(), "invalid number of components"); - return failure(); + return emitError(op.getLoc(), "invalid number of components"); } } @@ -193,8 +184,7 @@ class Writer { for (Value value : op.getOperands()) { Operation *index = value.getDefiningOp(); if (!index) { - emitError(value.getLoc(), "index must be defined"); - return failure(); + return emitError(value.getLoc(), "index must be defined"); } FailureOr repr = getIndexRepresentation(index); @@ -211,8 +201,7 @@ class Writer { for (Value value : op.getOperands()) { Operation *index = value.getDefiningOp(); if (!index) { - emitError(value.getLoc(), "index must be defined"); - return failure(); + return emitError(value.getLoc(), "index must be defined"); } if (auto indexImm = dyn_cast(*index)) { @@ -236,8 +225,7 @@ class Writer { continue; } - emitError(value.getLoc(), "invalid index type"); - return failure(); + return emitError(value.getLoc(), "invalid index type"); } return success(); @@ -249,7 +237,7 @@ class Writer { LogicalResult emitOperandImm(dxsa::OperandImm op) { auto attr = dyn_cast(op.getImm()); if (!attr) { - emitError(op.getLoc(), "invalid immediate operand"); + return emitError(op.getLoc(), "invalid immediate operand"); } uint32_t token = 0; @@ -260,8 +248,7 @@ class Writer { } else if (elementType.isInteger(64)) { token |= ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_IMMEDIATE64); } else { - emitError(op.getLoc(), "invalid immediate operand type"); - return failure(); + return emitError(op.getLoc(), "invalid immediate operand type"); } // Split immediates into tokens. 32 bit immediate values are @@ -283,9 +270,8 @@ class Writer { token |= ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT); } else { - emitError(op.getLoc(), - "immediate operand should be either 1- or 4- component"); - return failure(); + return emitError(op.getLoc(), + "immediate operand should be either 1- or 4- component"); } buffer.push_back(token); @@ -301,8 +287,7 @@ class Writer { LogicalResult emitIndexImm(dxsa::IndexImm op) { auto attr = dyn_cast(op.getImm()); if (!attr) { - emitError(op.getLoc(), "invalid immediate index"); - return failure(); + return emitError(op.getLoc(), "invalid immediate index"); } uint64_t value = attr.getInt(); @@ -317,22 +302,19 @@ class Writer { return success(); } - emitError(op.getLoc(), "invalid type of an immediate index"); - return failure(); + return emitError(op.getLoc(), "invalid type of an immediate index"); } // Emit an operand used as an index. LogicalResult emitIndexRel(dxsa::IndexRel index) { Operation *def = index.getOperand().getDefiningOp(); if (!def) { - emitError(index.getLoc(), "index must be defined"); - return failure(); + return emitError(index.getLoc(), "index must be defined"); } auto operand = dyn_cast(*def); if (!operand) { - emitError(def->getLoc(), "invalid index relative operand"); - return failure(); + return emitError(def->getLoc(), "invalid index relative operand"); } // Recursively emit an operand, which may also have other indices. @@ -343,14 +325,12 @@ class Writer { LogicalResult emitIndexRelImm(dxsa::IndexRelImm index) { Operation *def = index.getOperand().getDefiningOp(); if (!def) { - emitError(index.getLoc(), "index must be defined"); - return failure(); + return emitError(index.getLoc(), "index must be defined"); } auto operand = dyn_cast(*def); if (!operand) { - emitError(def->getLoc(), "invalid index relative operand"); - return failure(); + return emitError(def->getLoc(), "invalid index relative operand"); } if (failed(emitOperand(operand))) { From 82468a74d060d8a9fac05a1d8761f29bd4d6b7f8 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Wed, 29 Apr 2026 19:38:08 +0900 Subject: [PATCH 03/10] Use TypeSwitch --- mlir/lib/Target/DXSA/BinaryWriter.cpp | 59 +++++++++++---------------- 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/mlir/lib/Target/DXSA/BinaryWriter.cpp b/mlir/lib/Target/DXSA/BinaryWriter.cpp index 89a710151c90..c7b5f6e50ac9 100644 --- a/mlir/lib/Target/DXSA/BinaryWriter.cpp +++ b/mlir/lib/Target/DXSA/BinaryWriter.cpp @@ -5,6 +5,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLog.h" #include "llvm/Support/EndianStream.h" @@ -101,21 +102,17 @@ class Writer { return emitError(value.getLoc(), "undefined operand"); } - if (auto operand = dyn_cast(*op)) { - if (failed(emitOperand(operand))) { - return failure(); - } - continue; + auto result = + llvm::TypeSwitch(*op) + .Case([this](auto op) { return emitOperand(op); }) + .Case( + [this](auto op) { return emitOperandImm(op); }) + .Default([this](auto &op) { + return emitError(op.getLoc(), "unexpected operand kind"); + }); + if (failed(result)) { + return result; } - - if (auto operand = dyn_cast(*op)) { - if (failed(emitOperandImm(operand))) { - return failure(); - } - continue; - } - - return emitError(op->getLoc(), "unexpected operand kind"); } // Fixup instruction length after all operands are accumulated in @@ -204,28 +201,20 @@ class Writer { return emitError(value.getLoc(), "index must be defined"); } - if (auto indexImm = dyn_cast(*index)) { - if (failed(emitIndexImm(indexImm))) { - return failure(); - } - continue; - } - - if (auto indexRel = dyn_cast(*index)) { - if (failed(emitIndexRel(indexRel))) { - return failure(); - } - continue; + auto result = llvm::TypeSwitch(*index) + .Case( + [this](auto op) { return emitIndexImm(op); }) + .Case( + [this](auto op) { return emitIndexRel(op); }) + .Case( + [this](auto op) { return emitIndexRelImm(op); }) + .Default([this](auto &op) { + return emitError(op.getLoc(), "invalid index type"); + }); + + if (failed(result)) { + return result; } - - if (auto indexRelImm = dyn_cast(*index)) { - if (failed(emitIndexRelImm(indexRelImm))) { - return failure(); - } - continue; - } - - return emitError(value.getLoc(), "invalid index type"); } return success(); From e455a8e8bf54fa6605c7c7992e280a2d182d03e4 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Wed, 29 Apr 2026 19:53:27 +0900 Subject: [PATCH 04/10] Fix for loops --- mlir/lib/Target/DXSA/BinaryWriter.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Target/DXSA/BinaryWriter.cpp b/mlir/lib/Target/DXSA/BinaryWriter.cpp index c7b5f6e50ac9..c98d6206ccb7 100644 --- a/mlir/lib/Target/DXSA/BinaryWriter.cpp +++ b/mlir/lib/Target/DXSA/BinaryWriter.cpp @@ -150,7 +150,7 @@ class Writer { token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(*mask); } else if (auto swizzle = op.getSwizzle()) { SmallVector values; - for (APInt v : *swizzle) { + for (const APInt &v : *swizzle) { values.push_back(v.getZExtValue()); } if (values.size() != 4) { @@ -244,7 +244,7 @@ class Writer { // encoded as is, and 64 bit immediates are split into high and // low 32 bit parts. SmallVector values; - for (APInt v : attr) { + for (const APInt &v : attr) { uint64_t bits = v.getZExtValue(); if (v.getBitWidth() == 64) { values.push_back(bits >> 32); @@ -264,9 +264,7 @@ class Writer { } buffer.push_back(token); - for (uint32_t v : values) { - buffer.push_back(v); - } + llvm::append_range(buffer, values); return success(); } From c4948cd1afc0a74234d193fec8048e9fb84d3e9d Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Wed, 29 Apr 2026 20:46:12 +0900 Subject: [PATCH 05/10] Comment why we only handle Instructions in emitModule --- mlir/lib/Target/DXSA/BinaryWriter.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlir/lib/Target/DXSA/BinaryWriter.cpp b/mlir/lib/Target/DXSA/BinaryWriter.cpp index c98d6206ccb7..3162f5a59544 100644 --- a/mlir/lib/Target/DXSA/BinaryWriter.cpp +++ b/mlir/lib/Target/DXSA/BinaryWriter.cpp @@ -69,6 +69,9 @@ class Writer { } for (auto &op : region.front()) { + // Only handle instructions. Skip operands and indices - they + // are emitted by emitInstruction for instructions that use + // them. if (auto inst = dyn_cast(op)) { if (failed(emitInstruction(inst))) { return failure(); From 665704149cb2fb205c6a27a1546a7183d3ff0eb9 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Fri, 8 May 2026 21:55:43 +0900 Subject: [PATCH 06/10] Replace checks for invariants with casts and assertions, add more TypeSwitch --- mlir/lib/Target/DXSA/BinaryWriter.cpp | 97 +++++++++------------------ 1 file changed, 31 insertions(+), 66 deletions(-) diff --git a/mlir/lib/Target/DXSA/BinaryWriter.cpp b/mlir/lib/Target/DXSA/BinaryWriter.cpp index 3162f5a59544..35990e34d354 100644 --- a/mlir/lib/Target/DXSA/BinaryWriter.cpp +++ b/mlir/lib/Target/DXSA/BinaryWriter.cpp @@ -28,32 +28,24 @@ static void initOpcodeMap(OpcodeMap &opcodes) { } static FailureOr getIndexRepresentation(Operation *op) { - if (auto imm = dyn_cast(op)) { - auto attr = dyn_cast(imm.getImm()); - if (!attr) { - return emitError(op->getLoc(), "invalid immediate index"); - } - - if (attr.getType().isInteger(32)) { - return D3D10_SB_OPERAND_INDEX_IMMEDIATE32; - } - - if (attr.getType().isInteger(64)) { - return D3D10_SB_OPERAND_INDEX_IMMEDIATE64; - } - - return emitError(op->getLoc(), "invalid immediate index type"); - } - - if (isa(op)) { - return D3D10_SB_OPERAND_INDEX_RELATIVE; - } - - if (isa(op)) { - return D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; - } - - return emitError(op->getLoc(), "invalid index type"); + return llvm::TypeSwitch>(*op) + .Case([](auto imm) { + auto attr = cast(imm.getImm()); + auto type = cast(attr.getType()); + if (type.getWidth() == 32) { + return D3D10_SB_OPERAND_INDEX_IMMEDIATE32; + } + assert(type.getWidth() == 64 && "invalid index type"); + return D3D10_SB_OPERAND_INDEX_IMMEDIATE64; + }) + .Case( + [](auto imm) { return D3D10_SB_OPERAND_INDEX_RELATIVE; }) + .Case([](auto imm) { + return D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; + }) + .Default([](auto &op) { + return emitError(op.getLoc(), "invalid index type"); + }); } class Writer { @@ -183,9 +175,7 @@ class Writer { uint32_t dim = 0; for (Value value : op.getOperands()) { Operation *index = value.getDefiningOp(); - if (!index) { - return emitError(value.getLoc(), "index must be defined"); - } + assert(index && "undefined index"); FailureOr repr = getIndexRepresentation(index); if (failed(repr)) { @@ -200,17 +190,15 @@ class Writer { // Indices follow the operand token. for (Value value : op.getOperands()) { Operation *index = value.getDefiningOp(); - if (!index) { - return emitError(value.getLoc(), "index must be defined"); - } + assert(index && "undefined index"); auto result = llvm::TypeSwitch(*index) .Case( - [this](auto op) { return emitIndexImm(op); }) + [this](auto &op) { return emitIndexImm(op); }) .Case( - [this](auto op) { return emitIndexRel(op); }) + [this](auto &op) { return emitIndexRel(op); }) .Case( - [this](auto op) { return emitIndexRelImm(op); }) + [this](auto &op) { return emitIndexRelImm(op); }) .Default([this](auto &op) { return emitError(op.getLoc(), "invalid index type"); }); @@ -227,20 +215,16 @@ class Writer { // operands do not have indices. They are encoded as an operand // followed by N immediate values for each component. LogicalResult emitOperandImm(dxsa::OperandImm op) { - auto attr = dyn_cast(op.getImm()); - if (!attr) { - return emitError(op.getLoc(), "invalid immediate operand"); - } + auto attr = cast(op.getImm()); uint32_t token = 0; - Type elementType = attr.getType().getElementType(); - if (elementType.isInteger(32)) { + auto elementType = cast(attr.getType().getElementType()); + if (elementType.getWidth() == 32) { token |= ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_IMMEDIATE32); - } else if (elementType.isInteger(64)) { - token |= ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_IMMEDIATE64); } else { - return emitError(op.getLoc(), "invalid immediate operand type"); + assert(elementType.getWidth() == 64 && "invalid immediate"); + token |= ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_IMMEDIATE64); } // Split immediates into tokens. 32 bit immediate values are @@ -275,10 +259,7 @@ class Writer { // Emit an immediate index. Its type is encoded into the operand, so // here we only emit the value as tokens. LogicalResult emitIndexImm(dxsa::IndexImm op) { - auto attr = dyn_cast(op.getImm()); - if (!attr) { - return emitError(op.getLoc(), "invalid immediate index"); - } + auto attr = cast(op.getImm()); uint64_t value = attr.getInt(); if (attr.getType().isInteger(32)) { @@ -297,15 +278,7 @@ class Writer { // Emit an operand used as an index. LogicalResult emitIndexRel(dxsa::IndexRel index) { - Operation *def = index.getOperand().getDefiningOp(); - if (!def) { - return emitError(index.getLoc(), "index must be defined"); - } - - auto operand = dyn_cast(*def); - if (!operand) { - return emitError(def->getLoc(), "invalid index relative operand"); - } + auto operand = cast(index.getOperand().getDefiningOp()); // Recursively emit an operand, which may also have other indices. return emitOperand(operand); @@ -313,15 +286,7 @@ class Writer { // Emit an index as an operand + a 32 bit immediate offset. LogicalResult emitIndexRelImm(dxsa::IndexRelImm index) { - Operation *def = index.getOperand().getDefiningOp(); - if (!def) { - return emitError(index.getLoc(), "index must be defined"); - } - - auto operand = dyn_cast(*def); - if (!operand) { - return emitError(def->getLoc(), "invalid index relative operand"); - } + auto operand = cast(index.getOperand().getDefiningOp()); if (failed(emitOperand(operand))) { return failure(); From 461ed9c62252e1338368f6b0a3cd9d50cab273b6 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Wed, 13 May 2026 23:48:53 +0900 Subject: [PATCH 07/10] Move "invalid number of swizzle values" error to the verifier --- mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td | 1 + mlir/lib/Dialect/DXSA/IR/DXSA.cpp | 12 ++++++++++++ mlir/lib/Target/DXSA/BinaryWriter.cpp | 8 ++------ mlir/test/Target/DXSA/operand_swizzle_invalid.mlir | 4 ++++ 4 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 mlir/test/Target/DXSA/operand_swizzle_invalid.mlir diff --git a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td index b10ed675bf43..365c92bf13ab 100644 --- a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td +++ b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td @@ -87,6 +87,7 @@ def DXSA_Operand : DXSA_Op<"operand"> { OptionalAttr:$non_uniform); let results = (outs DXSA_OperandType:$operand); let assemblyFormat = "$operands attr-dict"; + let hasVerifier = 1; } def DXSA_OperandImm : DXSA_Op<"operand.imm"> { diff --git a/mlir/lib/Dialect/DXSA/IR/DXSA.cpp b/mlir/lib/Dialect/DXSA/IR/DXSA.cpp index 0281c642269a..54c0099b5ea3 100644 --- a/mlir/lib/Dialect/DXSA/IR/DXSA.cpp +++ b/mlir/lib/Dialect/DXSA/IR/DXSA.cpp @@ -34,6 +34,18 @@ void DXSADialect::initialize() { >(); } +//===----------------------------------------------------------------------===// +// Operand +//===----------------------------------------------------------------------===// + +LogicalResult Operand::verify() { + if (auto swizzle = getSwizzle()) { + if (swizzle->getNumElements() != 4) + return emitOpError("invalid number of swizzle values"); + } + return success(); +} + //===----------------------------------------------------------------------===// // DclGlobalFlags //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/DXSA/BinaryWriter.cpp b/mlir/lib/Target/DXSA/BinaryWriter.cpp index 35990e34d354..69caf0b1ac04 100644 --- a/mlir/lib/Target/DXSA/BinaryWriter.cpp +++ b/mlir/lib/Target/DXSA/BinaryWriter.cpp @@ -93,9 +93,7 @@ class Writer { for (Value value : inst.getOperands()) { Operation *op = value.getDefiningOp(); - if (!op) { - return emitError(value.getLoc(), "undefined operand"); - } + assert(op && "undefined operand"); auto result = llvm::TypeSwitch(*op) @@ -148,9 +146,7 @@ class Writer { for (const APInt &v : *swizzle) { values.push_back(v.getZExtValue()); } - if (values.size() != 4) { - return emitError(op.getLoc(), "invalid number of swizzle values"); - } + assert(values.size() == 4 && "invalid number of swizzle values"); token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE); token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE( diff --git a/mlir/test/Target/DXSA/operand_swizzle_invalid.mlir b/mlir/test/Target/DXSA/operand_swizzle_invalid.mlir new file mode 100644 index 000000000000..08eec568f36a --- /dev/null +++ b/mlir/test/Target/DXSA/operand_swizzle_invalid.mlir @@ -0,0 +1,4 @@ +// RUN: mlir-opt %s -verify-diagnostics + +// expected-error@+1 {{invalid number of swizzle values}} +%1 = dxsa.operand {num_components = 4 : i32, swizzle = dense<[0, 1, 2, 3, 4]> : vector<5xi32>, type = 1 : i32} From 778421da459e276683e366b4e7aa4f46cce4ecb4 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Wed, 13 May 2026 23:49:11 +0900 Subject: [PATCH 08/10] Use ModuleOp::getBody --- mlir/lib/Target/DXSA/BinaryWriter.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/mlir/lib/Target/DXSA/BinaryWriter.cpp b/mlir/lib/Target/DXSA/BinaryWriter.cpp index 69caf0b1ac04..0d43691983c2 100644 --- a/mlir/lib/Target/DXSA/BinaryWriter.cpp +++ b/mlir/lib/Target/DXSA/BinaryWriter.cpp @@ -55,12 +55,7 @@ class Writer { } LogicalResult emitModule(ModuleOp source) { - Region ®ion = source.getRegion(); - if (!region.hasOneBlock()) { - return emitError(region.getLoc(), "region should contain only one block"); - } - - for (auto &op : region.front()) { + for (auto &op : *source.getBody()) { // Only handle instructions. Skip operands and indices - they // are emitted by emitInstruction for instructions that use // them. From 8ef2d159b81c750a902c4060bfd9e36c3c373d65 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Thu, 14 May 2026 00:23:13 +0900 Subject: [PATCH 09/10] Move "invalid number of components" error to the verifier --- mlir/lib/Dialect/DXSA/IR/DXSA.cpp | 5 ++++ mlir/lib/Target/DXSA/BinaryWriter.cpp | 3 --- .../DXSA/operand_components_invalid.mlir | 23 +++++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 mlir/test/Target/DXSA/operand_components_invalid.mlir diff --git a/mlir/lib/Dialect/DXSA/IR/DXSA.cpp b/mlir/lib/Dialect/DXSA/IR/DXSA.cpp index 54c0099b5ea3..4952be180155 100644 --- a/mlir/lib/Dialect/DXSA/IR/DXSA.cpp +++ b/mlir/lib/Dialect/DXSA/IR/DXSA.cpp @@ -43,6 +43,11 @@ LogicalResult Operand::verify() { if (swizzle->getNumElements() != 4) return emitOpError("invalid number of swizzle values"); } + + uint32_t numComponents = getNumComponents(); + if (numComponents != 0 && numComponents != 1 && numComponents != 4) + return emitOpError("invalid number of components"); + return success(); } diff --git a/mlir/lib/Target/DXSA/BinaryWriter.cpp b/mlir/lib/Target/DXSA/BinaryWriter.cpp index 0d43691983c2..bad73387a0bf 100644 --- a/mlir/lib/Target/DXSA/BinaryWriter.cpp +++ b/mlir/lib/Target/DXSA/BinaryWriter.cpp @@ -155,9 +155,6 @@ class Writer { } break; } - default: { - return emitError(op.getLoc(), "invalid number of components"); - } } // Operand token encodes types and number of indices that follow diff --git a/mlir/test/Target/DXSA/operand_components_invalid.mlir b/mlir/test/Target/DXSA/operand_components_invalid.mlir new file mode 100644 index 000000000000..99267e2aa913 --- /dev/null +++ b/mlir/test/Target/DXSA/operand_components_invalid.mlir @@ -0,0 +1,23 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +// OK +%0 = dxsa.operand {num_components = 0 : i32, type = 1 : i32} + +// OK +%1 = dxsa.operand {num_components = 1 : i32, type = 1 : i32} + +// expected-error@+1 {{invalid number of components}} +%2 = dxsa.operand {num_components = 2 : i32, type = 1 : i32} + +// ----- + +// expected-error@+1 {{invalid number of components}} +%3 = dxsa.operand {num_components = 3 : i32, type = 1 : i32} + +// OK +%4 = dxsa.operand {num_components = 4 : i32, type = 1 : i32} + +// ----- + +// expected-error@+1 {{invalid number of components}} +%5 = dxsa.operand {num_components = 5 : i32, type = 1 : i32} From f7b2fcaf2417c06488fac0f50e63109f7950bad9 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Thu, 14 May 2026 00:33:44 +0900 Subject: [PATCH 10/10] Move "immediate operand should be either 1- or 4- component" error to the verifier --- mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td | 1 + mlir/lib/Dialect/DXSA/IR/DXSA.cpp | 14 +++++++++++++ mlir/lib/Target/DXSA/BinaryWriter.cpp | 18 ++++++++--------- .../DXSA/operand_imm_components_invalid.mlir | 20 +++++++++++++++++++ 4 files changed, 43 insertions(+), 10 deletions(-) create mode 100644 mlir/test/Target/DXSA/operand_imm_components_invalid.mlir diff --git a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td index 365c92bf13ab..f8c5916965ca 100644 --- a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td +++ b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td @@ -99,6 +99,7 @@ def DXSA_OperandImm : DXSA_Op<"operand.imm"> { let arguments = (ins AnyAttrOf<[I32ElementsAttr, I64ElementsAttr]>:$imm); let results = (outs DXSA_OperandType:$operand); let assemblyFormat = "attr-dict"; + let hasVerifier = 1; } def DXSA_IndexImm : DXSA_Op<"index.imm"> { diff --git a/mlir/lib/Dialect/DXSA/IR/DXSA.cpp b/mlir/lib/Dialect/DXSA/IR/DXSA.cpp index 4952be180155..7bb425ce4bde 100644 --- a/mlir/lib/Dialect/DXSA/IR/DXSA.cpp +++ b/mlir/lib/Dialect/DXSA/IR/DXSA.cpp @@ -51,6 +51,20 @@ LogicalResult Operand::verify() { return success(); } +//===----------------------------------------------------------------------===// +// OperandImm +//===----------------------------------------------------------------------===// + +LogicalResult OperandImm::verify() { + auto attr = cast(getImm()); + uint32_t numComponents = attr.getNumElements(); + + if (numComponents != 1 && numComponents != 4) + return emitOpError("immediate operand should be either 1- or 4- component"); + + return success(); +} + //===----------------------------------------------------------------------===// // DclGlobalFlags //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/DXSA/BinaryWriter.cpp b/mlir/lib/Target/DXSA/BinaryWriter.cpp index bad73387a0bf..728f7865594f 100644 --- a/mlir/lib/Target/DXSA/BinaryWriter.cpp +++ b/mlir/lib/Target/DXSA/BinaryWriter.cpp @@ -230,12 +230,11 @@ class Writer { if (values.size() == 1) { token |= ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_1_COMPONENT); - } else if (values.size() == 4) { + } else { + assert(values.size() == 4 && + "immediate operand should be either 1- or 4- component"); token |= ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT); - } else { - return emitError(op.getLoc(), - "immediate operand should be either 1- or 4- component"); } buffer.push_back(token); @@ -255,13 +254,12 @@ class Writer { return success(); } - if (attr.getType().isInteger(64)) { - buffer.push_back(value >> 32); - buffer.push_back(value); - return success(); - } + assert(attr.getType().isInteger(64) && + "invalid type of an immediate index"); - return emitError(op.getLoc(), "invalid type of an immediate index"); + buffer.push_back(value >> 32); + buffer.push_back(value); + return success(); } // Emit an operand used as an index. diff --git a/mlir/test/Target/DXSA/operand_imm_components_invalid.mlir b/mlir/test/Target/DXSA/operand_imm_components_invalid.mlir new file mode 100644 index 000000000000..539cff67a62f --- /dev/null +++ b/mlir/test/Target/DXSA/operand_imm_components_invalid.mlir @@ -0,0 +1,20 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +// OK +%1 = dxsa.operand.imm {imm = dense<1> : vector<1xi32>} + +// expected-error@+1 {{immediate operand should be either 1- or 4- component}} +%2 = dxsa.operand.imm {imm = dense<[1, 2]> : vector<2xi32>} + +// ----- + +// expected-error@+1 {{immediate operand should be either 1- or 4- component}} +%3 = dxsa.operand.imm {imm = dense<[1, 2, 3]> : vector<3xi32>} + +// ----- + +// OK +%4 = dxsa.operand.imm {imm = dense<[1, 2, 3, 4]> : vector<4xi32>} + +// expected-error@+1 {{immediate operand should be either 1- or 4- component}} +%5 = dxsa.operand.imm {imm = dense<[1, 2, 3, 4, 5]> : vector<5xi32>}