diff --git a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td index 5039217ee07b..f8c5916965ca 100644 --- a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td +++ b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td @@ -87,6 +87,7 @@ def DXSA_Operand : DXSA_Op<"operand"> { OptionalAttr:$non_uniform); let results = (outs DXSA_OperandType:$operand); let assemblyFormat = "$operands attr-dict"; + let hasVerifier = 1; } def DXSA_OperandImm : DXSA_Op<"operand.imm"> { @@ -98,6 +99,7 @@ def DXSA_OperandImm : DXSA_Op<"operand.imm"> { let arguments = (ins AnyAttrOf<[I32ElementsAttr, I64ElementsAttr]>:$imm); let results = (outs DXSA_OperandType:$operand); let assemblyFormat = "attr-dict"; + let hasVerifier = 1; } def DXSA_IndexImm : DXSA_Op<"index.imm"> { @@ -128,7 +130,7 @@ def DXSA_IndexRelImm : DXSA_Op<"index.rel.imm"> { TODO }]; - let arguments = (ins DXSA_OperandType:$operand, StrAttr:$op, I64Attr:$imm); + let arguments = (ins DXSA_OperandType:$operand, StrAttr:$op, I32Attr:$imm); let results = (outs DXSA_IndexType:$index); let assemblyFormat = "$operand attr-dict"; } diff --git a/mlir/lib/Dialect/DXSA/IR/DXSA.cpp b/mlir/lib/Dialect/DXSA/IR/DXSA.cpp index 0281c642269a..7bb425ce4bde 100644 --- a/mlir/lib/Dialect/DXSA/IR/DXSA.cpp +++ b/mlir/lib/Dialect/DXSA/IR/DXSA.cpp @@ -34,6 +34,37 @@ void DXSADialect::initialize() { >(); } +//===----------------------------------------------------------------------===// +// Operand +//===----------------------------------------------------------------------===// + +LogicalResult Operand::verify() { + if (auto swizzle = getSwizzle()) { + if (swizzle->getNumElements() != 4) + return emitOpError("invalid number of swizzle values"); + } + + uint32_t numComponents = getNumComponents(); + if (numComponents != 0 && numComponents != 1 && numComponents != 4) + return emitOpError("invalid number of components"); + + return success(); +} + +//===----------------------------------------------------------------------===// +// OperandImm +//===----------------------------------------------------------------------===// + +LogicalResult OperandImm::verify() { + auto attr = cast(getImm()); + uint32_t numComponents = attr.getNumElements(); + + if (numComponents != 1 && numComponents != 4) + return emitOpError("immediate operand should be either 1- or 4- component"); + + return success(); +} + //===----------------------------------------------------------------------===// // DclGlobalFlags //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/DXSA/BinaryParser.cpp b/mlir/lib/Target/DXSA/BinaryParser.cpp index 2ac6629e6d75..ed6480fdb30b 100644 --- a/mlir/lib/Target/DXSA/BinaryParser.cpp +++ b/mlir/lib/Target/DXSA/BinaryParser.cpp @@ -52,320 +52,8 @@ struct InstructionInfo { static void initInstructionInfo(MutableArrayRef instructions) { #define SET(OpCode, Name, NumOperands, PrecMask, OpClass) \ instructions[OpCode] = InstructionInfo{NumOperands, Name, OpClass, PrecMask}; - // clang-format off - SET(D3D10_SB_OPCODE_ADD, "add", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_AND, "and", 3, 0x06, D3D10_SB_BIT_OP); - SET(D3D10_SB_OPCODE_BREAK, "break", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_BREAKC, "breakc", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CALL, "call", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CALLC, "callc", 2, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CONTINUE, "continue", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CONTINUEC, "continuec", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CASE, "case", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_CUT, "cut", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_DEFAULT, "default", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_DISCARD, "discard", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_DIV, "div", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_DP2, "dp2", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_DP3, "dp3", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_DP4, "dp4", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_ELSE, "else", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_EMIT, "emit", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_EMITTHENCUT, "emit_then_cut", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_ENDIF, "endif", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_ENDLOOP, "endloop", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_ENDSWITCH, "endswitch", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_EQ, "eq", 3, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_EXP, "exp", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_FRC, "frc", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_FTOI, "ftoi", 2, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_FTOU, "ftou", 2, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_GE, "ge", 3, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_DERIV_RTX, "deriv_rtx", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_DERIV_RTY, "deriv_rty", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_IADD, "iadd", 3, 0x06, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IF, "if", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_IEQ, "ieq", 3, 0x00, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IGE, "ige", 3, 0x00, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_ILT, "ilt", 3, 0x00, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IMAD, "imad", 4, 0x0e, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IMAX, "imax", 3, 0x06, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IMIN, "imin", 3, 0x06, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_IMUL, "imul", 4, 0x0c, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_INE, "ine", 3, 0x00, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_INEG, "ineg", 2, 0x02, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_ISHL, "ishl", 3, 0x02, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_ISHR, "ishr", 3, 0x02, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_ITOF, "itof", 2, 0x00, D3D10_SB_INT_OP); - SET(D3D10_SB_OPCODE_LABEL, "label", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_LD, "ld", 3, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_LD_MS, "ldms", 4, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_LOG, "log", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_LOOP, "loop", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_LT, "lt", 3, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MAD, "mad", 4, 0x0e, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MAX, "max", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MIN, "min", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MOV, "mov", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MOVC, "movc", 4, 0x0c, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_MUL, "mul", 3, 0x06, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_NE, "ne", 3, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_NOP, "nop", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_NOT, "not", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D10_SB_OPCODE_OR, "or", 3, 0x06, D3D10_SB_BIT_OP); - SET(D3D10_SB_OPCODE_RESINFO, "resinfo", 3, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_RET, "ret", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_RETC, "retc", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_ROUND_NE, "round_ne", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_ROUND_NI, "round_ni", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_ROUND_PI, "round_pi", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_ROUND_Z, "round_z", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_RSQ, "rsq", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_SAMPLE, "sample", 4, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SAMPLE_B, "sample_b", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SAMPLE_L, "sample_l", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SAMPLE_D, "sample_d", 6, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SAMPLE_C, "sample_c", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SAMPLE_C_LZ, "sample_c_lz", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_SB_OPCODE_SQRT, "sqrt", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_SWITCH, "switch", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_SINCOS, "sincos", 3, 0x04, D3D10_SB_FLOAT_OP); - SET(D3D10_SB_OPCODE_UDIV, "udiv", 4, 0x0c, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_ULT, "ult", 3, 0x00, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UGE, "uge", 3, 0x00, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UMAX, "umax", 3, 0x06, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UMIN, "umin", 3, 0x06, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UMUL, "umul", 4, 0x0c, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UMAD, "umad", 4, 0x0e, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_USHR, "ushr", 3, 0x02, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_UTOF, "utof", 2, 0x00, D3D10_SB_UINT_OP); - SET(D3D10_SB_OPCODE_XOR, "xor", 3, 0x06, D3D10_SB_BIT_OP); - SET(D3D10_SB_OPCODE_RESERVED0, "jmp", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT, "dcl_input", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_OUTPUT, "dcl_output", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT_SGV, "dcl_input_sgv", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, "dcl_input_ps_sgv", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, "dcl_inputprimitive", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, "dcl_outputtopology", 0, - 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, "dcl_maxout", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT_PS, "dcl_input_ps", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, "dcl_constantbuffer", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_SAMPLER, "dcl_sampler", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_RESOURCE, "dcl_resource", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT_SIV, "dcl_input_siv", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, "dcl_input_ps_siv", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_OUTPUT_SIV, "dcl_output_siv", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_OUTPUT_SGV, "dcl_output_sgv", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_TEMPS, "dcl_temps", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, "dcl_indexableTemp", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_INDEX_RANGE, "dcl_indexrange", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, "dcl_globalFlags", 0, 0x00, - D3D10_SB_DCL_OP); - - SET(D3D10_1_SB_OPCODE_SAMPLE_INFO, "sampleinfo", 2, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_1_SB_OPCODE_SAMPLE_POS, "samplepos", 3, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_1_SB_OPCODE_GATHER4, "gather4", 4, 0x00, D3D10_SB_TEX_OP); - SET(D3D10_1_SB_OPCODE_LOD, "lod", 4, 0x00, D3D10_SB_TEX_OP); - - SET(D3D11_SB_OPCODE_EMIT_STREAM, "emit_stream", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D11_SB_OPCODE_CUT_STREAM, "cut_stream", 1, 0x00, D3D10_SB_FLOW_OP); - SET(D3D11_SB_OPCODE_EMITTHENCUT_STREAM, "emit_then_cut_stream", 1, 0x00, - D3D10_SB_FLOW_OP); - SET(D3D11_SB_OPCODE_INTERFACE_CALL, "fcall", 1, 0x00, D3D10_SB_FLOW_OP); - - SET(D3D11_SB_OPCODE_DCL_STREAM, "dcl_stream", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_FUNCTION_BODY, "dcl_function_body", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_FUNCTION_TABLE, "dcl_function_table", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_INTERFACE, "dcl_interface", 0, 0x00, D3D10_SB_DCL_OP); - - SET(D3D11_SB_OPCODE_BUFINFO, "bufinfo", 2, 0x00, D3D10_SB_TEX_OP); - SET(D3D11_SB_OPCODE_DERIV_RTX_COARSE, "deriv_rtx_coarse", 2, 0x02, - D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_DERIV_RTX_FINE, "deriv_rtx_fine", 2, 0x02, - D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_DERIV_RTY_COARSE, "deriv_rty_coarse", 2, 0x02, - D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_DERIV_RTY_FINE, "deriv_rty_fine", 2, 0x02, - D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_GATHER4_C, "gather4_c", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D11_SB_OPCODE_GATHER4_PO, "gather4_po", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3D11_SB_OPCODE_GATHER4_PO_C, "gather4_po_c", 6, 0x00, D3D10_SB_TEX_OP); - SET(D3D11_SB_OPCODE_RCP, "rcp", 2, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_F32TOF16, "f32tof16", 2, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_F16TOF32, "f16tof32", 2, 0x00, D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_UADDC, "uaddc", 4, 0x0c, D3D10_SB_UINT_OP); - SET(D3D11_SB_OPCODE_USUBB, "usubb", 4, 0x0c, D3D10_SB_UINT_OP); - SET(D3D11_SB_OPCODE_COUNTBITS, "countbits", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_FIRSTBIT_HI, "firstbit_hi", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_FIRSTBIT_LO, "firstbit_lo", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_FIRSTBIT_SHI, "firstbit_shi", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_UBFE, "ubfe", 4, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_IBFE, "ibfe", 4, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_BFI, "bfi", 5, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_BFREV, "bfrev", 2, 0x02, D3D10_SB_BIT_OP); - SET(D3D11_SB_OPCODE_SWAPC, "swapc", 5, 0x02, D3D10_SB_FLOAT_OP); - - SET(D3D11_SB_OPCODE_HS_DECLS, "hs_decls", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_HS_CONTROL_POINT_PHASE, "hs_control_point_phase", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_HS_FORK_PHASE, "hs_fork_phase", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_HS_JOIN_PHASE, "hs_join_phase", 0, 0x00, D3D10_SB_DCL_OP); - - SET(D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT, - "dcl_input_control_point_count", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT, - "dcl_output_control_point_count", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_TESS_DOMAIN, "dcl_tessellator_domain", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_TESS_PARTITIONING, "dcl_tessellator_partitioning", 0, - 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE, - "dcl_tessellator_output_primitive", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR, "dcl_hs_max_tessfactor", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT, - "dcl_hs_fork_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, - "dcl_hs_join_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP); - - SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP, "dcl_thread_group", 0, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED, "dcl_uav_typed", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW, "dcl_uav_raw", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED, - "dcl_uav_structured", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW, "dcl_tgsm_raw", 1, - 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED, - "dcl_tgsm_structured", 1, 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_RESOURCE_RAW, "dcl_resource_raw", 1, 0x00, - D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED, "dcl_resource_structured", 1, - 0x00, D3D10_SB_DCL_OP); - SET(D3D11_SB_OPCODE_LD_UAV_TYPED, "ld_uav_typed", 3, 0x00, D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_STORE_UAV_TYPED, "store_uav_typed", 3, 0x00, - D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_LD_RAW, "ld_raw", 3, 0x00, D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_STORE_RAW, "store_raw", 3, 0x00, D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_LD_STRUCTURED, "ld_structured", 4, 0x00, D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_STORE_STRUCTURED, "store_structured", 4, 0x00, - D3D11_SB_MEM_OP); - SET(D3D11_SB_OPCODE_ATOMIC_AND, "atomic_and", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_OR, "atomic_or", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_XOR, "atomic_xor", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_CMP_STORE, "atomic_cmp_store", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_IADD, "atomic_iadd", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_IMAX, "atomic_imax", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_IMIN, "atomic_imin", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_UMAX, "atomic_umax", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_ATOMIC_UMIN, "atomic_umin", 3, 0x00, D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_ALLOC, "imm_atomic_alloc", 2, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_CONSUME, "imm_atomic_consume", 2, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_IADD, "imm_atomic_iadd", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_AND, "imm_atomic_and", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_OR, "imm_atomic_or", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_XOR, "imm_atomic_xor", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_EXCH, "imm_atomic_exch", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH, "imm_atomic_cmp_exch", 5, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_IMAX, "imm_atomic_imax", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_IMIN, "imm_atomic_imin", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_UMAX, "imm_atomic_umax", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_IMM_ATOMIC_UMIN, "imm_atomic_umin", 4, 0x00, - D3D11_SB_ATOMIC_OP); - SET(D3D11_SB_OPCODE_SYNC, "sync", 0, 0x00, D3D10_SB_FLOW_OP); - SET(D3D11_SB_OPCODE_EVAL_SNAPPED, "eval_snapped", 3, 0x02, D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX, "eval_sample_index", 3, 0x02, - D3D10_SB_FLOAT_OP); - SET(D3D11_SB_OPCODE_EVAL_CENTROID, "eval_centroid", 2, 0x02, - D3D10_SB_FLOAT_OP); - - SET(D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT, "dcl_gsinstances", 0, 0x00, - D3D10_SB_DCL_OP); - - SET(D3D11_SB_OPCODE_DADD, "dadd", 3, 0x06, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DMAX, "dmax", 3, 0x06, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DMIN, "dmin", 3, 0x06, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DMUL, "dmul", 3, 0x06, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DEQ, "deq", 3, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DGE, "dge", 3, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DLT, "dlt", 3, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DNE, "dne", 3, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DMOV, "dmov", 2, 0x02, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DMOVC, "dmovc", 4, 0x0c, D3D11_SB_DOUBLE_OP); - SET(D3D11_SB_OPCODE_DTOF, "dtof", 2, 0x02, D3D11_SB_DOUBLE_TO_FLOAT_OP); - SET(D3D11_SB_OPCODE_FTOD, "ftod", 2, 0x00, D3D11_SB_FLOAT_TO_DOUBLE_OP); - - SET(D3D11_SB_OPCODE_ABORT, "abort", 0, 0x00, D3D11_SB_DEBUG_OP); - SET(D3D11_SB_OPCODE_DEBUG_BREAK, "debug_break", 0, 0x00, D3D11_SB_DEBUG_OP); - - SET(D3D11_1_SB_OPCODE_DDIV, "ddiv", 3, 0x06, D3D11_SB_DOUBLE_OP); - SET(D3D11_1_SB_OPCODE_DFMA, "dfma", 4, 0x0e, D3D11_SB_DOUBLE_OP); - SET(D3D11_1_SB_OPCODE_DRCP, "drcp", 2, 0x02, D3D11_SB_DOUBLE_OP); - - SET(D3D11_1_SB_OPCODE_MSAD, "msad", 4, 0x0e, D3D10_SB_UINT_OP); - - SET(D3D11_1_SB_OPCODE_DTOI, "dtoi", 2, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_1_SB_OPCODE_DTOU, "dtou", 2, 0x00, D3D11_SB_DOUBLE_OP); - SET(D3D11_1_SB_OPCODE_ITOD, "itod", 2, 0x00, D3D10_SB_INT_OP); - SET(D3D11_1_SB_OPCODE_UTOD, "utod", 2, 0x00, D3D10_SB_UINT_OP); - - SET(D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK, "gather4_s", 5, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK, "gather4_c_s", 6, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK, "gather4_po_s", 6, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK, "gather4_po_c_s", 7, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_LD_FEEDBACK, "ld_s", 4, 0x00, D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK, "ldms_s", 5, 0x00, D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_LD_UAV_TYPED_FEEDBACK, "ld_uav_typed_s", 4, 0x00, - D3D11_SB_MEM_OP); - SET(D3DWDDM1_3_SB_OPCODE_LD_RAW_FEEDBACK, "ld_raw_s", 4, 0x00, - D3D11_SB_MEM_OP); - SET(D3DWDDM1_3_SB_OPCODE_LD_STRUCTURED_FEEDBACK, "ld_structured_s", 5, 0x00, - D3D11_SB_MEM_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK, "sample_l_s", 6, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK, "sample_c_lz_s", 6, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK, "sample_cl_s", 6, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK, "sample_b_cl_s", 7, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK, "sample_d_cl_s", 8, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK, "sample_c_cl_s", 7, 0x00, - D3D10_SB_TEX_OP); - SET(D3DWDDM1_3_SB_OPCODE_CHECK_ACCESS_FULLY_MAPPED, - "check_access_fully_mapped", 2, 0x00, D3D10_SB_TEX_OP); - // clang-format on +#include "InstrInfo.def" +#undef SET } struct InstructionModifier { diff --git a/mlir/lib/Target/DXSA/BinaryWriter.cpp b/mlir/lib/Target/DXSA/BinaryWriter.cpp index 4c19d41a7790..728f7865594f 100644 --- a/mlir/lib/Target/DXSA/BinaryWriter.cpp +++ b/mlir/lib/Target/DXSA/BinaryWriter.cpp @@ -5,6 +5,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLog.h" #include "llvm/Support/EndianStream.h" @@ -17,10 +18,279 @@ using namespace mlir; using namespace llvm; +using OpcodeMap = llvm::DenseMap; + +static void initOpcodeMap(OpcodeMap &opcodes) { +#define SET(OpCode, Name, NumOperands, PrecMask, OpClass) \ + opcodes[Name] = OpCode; +#include "InstrInfo.def" +#undef SET +} + +static FailureOr getIndexRepresentation(Operation *op) { + return llvm::TypeSwitch>(*op) + .Case([](auto imm) { + auto attr = cast(imm.getImm()); + auto type = cast(attr.getType()); + if (type.getWidth() == 32) { + return D3D10_SB_OPERAND_INDEX_IMMEDIATE32; + } + assert(type.getWidth() == 64 && "invalid index type"); + return D3D10_SB_OPERAND_INDEX_IMMEDIATE64; + }) + .Case( + [](auto imm) { return D3D10_SB_OPERAND_INDEX_RELATIVE; }) + .Case([](auto imm) { + return D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; + }) + .Default([](auto &op) { + return emitError(op.getLoc(), "invalid index type"); + }); +} + +class Writer { +public: + Writer(raw_ostream &output) : output(output, endianness::little) { + initOpcodeMap(opcodeMap); + } + + LogicalResult emitModule(ModuleOp source) { + for (auto &op : *source.getBody()) { + // Only handle instructions. Skip operands and indices - they + // are emitted by emitInstruction for instructions that use + // them. + if (auto inst = dyn_cast(op)) { + if (failed(emitInstruction(inst))) { + return failure(); + } + } + } + return success(); + } + + // Emit an instruction and all its operands recursively. + // FIXME: add extended instructions + LogicalResult emitInstruction(dxsa::Instruction inst) { + // Buffer all tokens for an instruction, so we can fixup + // instruction length before emitting tokens to the output. + buffer.clear(); + + auto opcodeIt = opcodeMap.find(inst.getMnemonic()); + if (opcodeIt == opcodeMap.end()) { + return emitError(inst.getLoc(), "unknown mnemonic"); + } + + // First token is an opcode and length. Length is unknown until we + // process all operands. + uint32_t opcode = opcodeIt->second; + uint32_t token = ENCODE_D3D10_SB_OPCODE_TYPE(opcode); + buffer.push_back(token); + + for (Value value : inst.getOperands()) { + Operation *op = value.getDefiningOp(); + assert(op && "undefined operand"); + + auto result = + llvm::TypeSwitch(*op) + .Case([this](auto op) { return emitOperand(op); }) + .Case( + [this](auto op) { return emitOperandImm(op); }) + .Default([this](auto &op) { + return emitError(op.getLoc(), "unexpected operand kind"); + }); + if (failed(result)) { + return result; + } + } + + // Fixup instruction length after all operands are accumulated in + // the buffer. + buffer[0] |= ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(buffer.size()); + for (uint32_t token : buffer) { + output.write(token); + } + + return success(); + } + + // Emit an operand and all its indices recursively. + LogicalResult emitOperand(dxsa::Operand op) { + uint32_t token = ENCODE_D3D10_SB_OPERAND_TYPE(op.getType()); + + // Encode swizzle, mask, or one component selection. + switch (op.getNumComponents()) { + case 0: { + token |= + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_0_COMPONENT); + break; + } + case 1: { + token |= + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_1_COMPONENT); + break; + } + case 4: { + token |= + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT); + if (auto mask = op.getMask()) { + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( + D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE); + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(*mask); + } else if (auto swizzle = op.getSwizzle()) { + SmallVector values; + for (const APInt &v : *swizzle) { + values.push_back(v.getZExtValue()); + } + assert(values.size() == 4 && "invalid number of swizzle values"); + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( + D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE); + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE( + values[0], values[1], values[2], values[3]); + break; + } else if (auto one = op.getOne()) { + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( + D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE); + token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(*one); + break; + } + break; + } + } + + // Operand token encodes types and number of indices that follow + // it. + token |= ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(op.getNumOperands()); + uint32_t dim = 0; + for (Value value : op.getOperands()) { + Operation *index = value.getDefiningOp(); + assert(index && "undefined index"); + + FailureOr repr = getIndexRepresentation(index); + if (failed(repr)) { + return failure(); + } + token |= ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION(dim, *repr); + dim += 1; + } + + buffer.push_back(token); + + // Indices follow the operand token. + for (Value value : op.getOperands()) { + Operation *index = value.getDefiningOp(); + assert(index && "undefined index"); + + auto result = llvm::TypeSwitch(*index) + .Case( + [this](auto &op) { return emitIndexImm(op); }) + .Case( + [this](auto &op) { return emitIndexRel(op); }) + .Case( + [this](auto &op) { return emitIndexRelImm(op); }) + .Default([this](auto &op) { + return emitError(op.getLoc(), "invalid index type"); + }); + + if (failed(result)) { + return result; + } + } + + return success(); + } + + // Emit an immediate operand. Unlike register operands, immediate + // operands do not have indices. They are encoded as an operand + // followed by N immediate values for each component. + LogicalResult emitOperandImm(dxsa::OperandImm op) { + auto attr = cast(op.getImm()); + + uint32_t token = 0; + + auto elementType = cast(attr.getType().getElementType()); + if (elementType.getWidth() == 32) { + token |= ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_IMMEDIATE32); + } else { + assert(elementType.getWidth() == 64 && "invalid immediate"); + token |= ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_IMMEDIATE64); + } + + // Split immediates into tokens. 32 bit immediate values are + // encoded as is, and 64 bit immediates are split into high and + // low 32 bit parts. + SmallVector values; + for (const APInt &v : attr) { + uint64_t bits = v.getZExtValue(); + if (v.getBitWidth() == 64) { + values.push_back(bits >> 32); + } + values.push_back(bits); + } + + if (values.size() == 1) { + token |= + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_1_COMPONENT); + } else { + assert(values.size() == 4 && + "immediate operand should be either 1- or 4- component"); + token |= + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT); + } + + buffer.push_back(token); + llvm::append_range(buffer, values); + + return success(); + } + + // Emit an immediate index. Its type is encoded into the operand, so + // here we only emit the value as tokens. + LogicalResult emitIndexImm(dxsa::IndexImm op) { + auto attr = cast(op.getImm()); + + uint64_t value = attr.getInt(); + if (attr.getType().isInteger(32)) { + buffer.push_back(value); + return success(); + } + + assert(attr.getType().isInteger(64) && + "invalid type of an immediate index"); + + buffer.push_back(value >> 32); + buffer.push_back(value); + return success(); + } + + // Emit an operand used as an index. + LogicalResult emitIndexRel(dxsa::IndexRel index) { + auto operand = cast(index.getOperand().getDefiningOp()); + + // Recursively emit an operand, which may also have other indices. + return emitOperand(operand); + } + + // Emit an index as an operand + a 32 bit immediate offset. + LogicalResult emitIndexRelImm(dxsa::IndexRelImm index) { + auto operand = cast(index.getOperand().getDefiningOp()); + + if (failed(emitOperand(operand))) { + return failure(); + } + + buffer.push_back(index.getImm()); + return success(); + } + +private: + std::vector buffer; + support::endian::Writer output; + OpcodeMap opcodeMap; +}; + namespace mlir::dxsa { LogicalResult exportModuleToDxsaBinary(ModuleOp source, raw_ostream &output) { - Region ®ion = source.getRegion(); - assert(region.hasOneBlock() && "invalid module"); - return failure(); + Writer writer(output); + return writer.emitModule(source); } } // namespace mlir::dxsa diff --git a/mlir/lib/Target/DXSA/InstrInfo.def b/mlir/lib/Target/DXSA/InstrInfo.def new file mode 100644 index 000000000000..35a36db3285e --- /dev/null +++ b/mlir/lib/Target/DXSA/InstrInfo.def @@ -0,0 +1,312 @@ +SET(D3D10_SB_OPCODE_ADD, "add", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_AND, "and", 3, 0x06, D3D10_SB_BIT_OP); +SET(D3D10_SB_OPCODE_BREAK, "break", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_BREAKC, "breakc", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CALL, "call", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CALLC, "callc", 2, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CONTINUE, "continue", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CONTINUEC, "continuec", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CASE, "case", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_CUT, "cut", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_DEFAULT, "default", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_DISCARD, "discard", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_DIV, "div", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_DP2, "dp2", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_DP3, "dp3", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_DP4, "dp4", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_ELSE, "else", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_EMIT, "emit", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_EMITTHENCUT, "emit_then_cut", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_ENDIF, "endif", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_ENDLOOP, "endloop", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_ENDSWITCH, "endswitch", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_EQ, "eq", 3, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_EXP, "exp", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_FRC, "frc", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_FTOI, "ftoi", 2, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_FTOU, "ftou", 2, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_GE, "ge", 3, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_DERIV_RTX, "deriv_rtx", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_DERIV_RTY, "deriv_rty", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_IADD, "iadd", 3, 0x06, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IF, "if", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_IEQ, "ieq", 3, 0x00, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IGE, "ige", 3, 0x00, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_ILT, "ilt", 3, 0x00, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IMAD, "imad", 4, 0x0e, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IMAX, "imax", 3, 0x06, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IMIN, "imin", 3, 0x06, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_IMUL, "imul", 4, 0x0c, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_INE, "ine", 3, 0x00, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_INEG, "ineg", 2, 0x02, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_ISHL, "ishl", 3, 0x02, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_ISHR, "ishr", 3, 0x02, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_ITOF, "itof", 2, 0x00, D3D10_SB_INT_OP); +SET(D3D10_SB_OPCODE_LABEL, "label", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_LD, "ld", 3, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_LD_MS, "ldms", 4, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_LOG, "log", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_LOOP, "loop", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_LT, "lt", 3, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MAD, "mad", 4, 0x0e, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MAX, "max", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MIN, "min", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MOV, "mov", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MOVC, "movc", 4, 0x0c, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_MUL, "mul", 3, 0x06, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_NE, "ne", 3, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_NOP, "nop", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_NOT, "not", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D10_SB_OPCODE_OR, "or", 3, 0x06, D3D10_SB_BIT_OP); +SET(D3D10_SB_OPCODE_RESINFO, "resinfo", 3, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_RET, "ret", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_RETC, "retc", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_ROUND_NE, "round_ne", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_ROUND_NI, "round_ni", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_ROUND_PI, "round_pi", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_ROUND_Z, "round_z", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_RSQ, "rsq", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_SAMPLE, "sample", 4, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SAMPLE_B, "sample_b", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SAMPLE_L, "sample_l", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SAMPLE_D, "sample_d", 6, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SAMPLE_C, "sample_c", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SAMPLE_C_LZ, "sample_c_lz", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_SB_OPCODE_SQRT, "sqrt", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_SWITCH, "switch", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_SINCOS, "sincos", 3, 0x04, D3D10_SB_FLOAT_OP); +SET(D3D10_SB_OPCODE_UDIV, "udiv", 4, 0x0c, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_ULT, "ult", 3, 0x00, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UGE, "uge", 3, 0x00, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UMAX, "umax", 3, 0x06, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UMIN, "umin", 3, 0x06, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UMUL, "umul", 4, 0x0c, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UMAD, "umad", 4, 0x0e, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_USHR, "ushr", 3, 0x02, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_UTOF, "utof", 2, 0x00, D3D10_SB_UINT_OP); +SET(D3D10_SB_OPCODE_XOR, "xor", 3, 0x06, D3D10_SB_BIT_OP); +SET(D3D10_SB_OPCODE_RESERVED0, "jmp", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT, "dcl_input", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_OUTPUT, "dcl_output", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT_SGV, "dcl_input_sgv", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, "dcl_input_ps_sgv", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, "dcl_inputprimitive", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, "dcl_outputtopology", 0, + 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, "dcl_maxout", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT_PS, "dcl_input_ps", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, "dcl_constantbuffer", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_SAMPLER, "dcl_sampler", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_RESOURCE, "dcl_resource", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT_SIV, "dcl_input_siv", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, "dcl_input_ps_siv", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_OUTPUT_SIV, "dcl_output_siv", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_OUTPUT_SGV, "dcl_output_sgv", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_TEMPS, "dcl_temps", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, "dcl_indexableTemp", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_INDEX_RANGE, "dcl_indexrange", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, "dcl_globalFlags", 0, 0x00, + D3D10_SB_DCL_OP); + +SET(D3D10_1_SB_OPCODE_SAMPLE_INFO, "sampleinfo", 2, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_1_SB_OPCODE_SAMPLE_POS, "samplepos", 3, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_1_SB_OPCODE_GATHER4, "gather4", 4, 0x00, D3D10_SB_TEX_OP); +SET(D3D10_1_SB_OPCODE_LOD, "lod", 4, 0x00, D3D10_SB_TEX_OP); + +SET(D3D11_SB_OPCODE_EMIT_STREAM, "emit_stream", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D11_SB_OPCODE_CUT_STREAM, "cut_stream", 1, 0x00, D3D10_SB_FLOW_OP); +SET(D3D11_SB_OPCODE_EMITTHENCUT_STREAM, "emit_then_cut_stream", 1, 0x00, + D3D10_SB_FLOW_OP); +SET(D3D11_SB_OPCODE_INTERFACE_CALL, "fcall", 1, 0x00, D3D10_SB_FLOW_OP); + +SET(D3D11_SB_OPCODE_DCL_STREAM, "dcl_stream", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_FUNCTION_BODY, "dcl_function_body", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_FUNCTION_TABLE, "dcl_function_table", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_INTERFACE, "dcl_interface", 0, 0x00, D3D10_SB_DCL_OP); + +SET(D3D11_SB_OPCODE_BUFINFO, "bufinfo", 2, 0x00, D3D10_SB_TEX_OP); +SET(D3D11_SB_OPCODE_DERIV_RTX_COARSE, "deriv_rtx_coarse", 2, 0x02, + D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_DERIV_RTX_FINE, "deriv_rtx_fine", 2, 0x02, + D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_DERIV_RTY_COARSE, "deriv_rty_coarse", 2, 0x02, + D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_DERIV_RTY_FINE, "deriv_rty_fine", 2, 0x02, + D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_GATHER4_C, "gather4_c", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D11_SB_OPCODE_GATHER4_PO, "gather4_po", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3D11_SB_OPCODE_GATHER4_PO_C, "gather4_po_c", 6, 0x00, D3D10_SB_TEX_OP); +SET(D3D11_SB_OPCODE_RCP, "rcp", 2, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_F32TOF16, "f32tof16", 2, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_F16TOF32, "f16tof32", 2, 0x00, D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_UADDC, "uaddc", 4, 0x0c, D3D10_SB_UINT_OP); +SET(D3D11_SB_OPCODE_USUBB, "usubb", 4, 0x0c, D3D10_SB_UINT_OP); +SET(D3D11_SB_OPCODE_COUNTBITS, "countbits", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_FIRSTBIT_HI, "firstbit_hi", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_FIRSTBIT_LO, "firstbit_lo", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_FIRSTBIT_SHI, "firstbit_shi", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_UBFE, "ubfe", 4, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_IBFE, "ibfe", 4, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_BFI, "bfi", 5, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_BFREV, "bfrev", 2, 0x02, D3D10_SB_BIT_OP); +SET(D3D11_SB_OPCODE_SWAPC, "swapc", 5, 0x02, D3D10_SB_FLOAT_OP); + +SET(D3D11_SB_OPCODE_HS_DECLS, "hs_decls", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_HS_CONTROL_POINT_PHASE, "hs_control_point_phase", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_HS_FORK_PHASE, "hs_fork_phase", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_HS_JOIN_PHASE, "hs_join_phase", 0, 0x00, D3D10_SB_DCL_OP); + +SET(D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT, + "dcl_input_control_point_count", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT, + "dcl_output_control_point_count", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_TESS_DOMAIN, "dcl_tessellator_domain", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_TESS_PARTITIONING, "dcl_tessellator_partitioning", 0, + 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE, + "dcl_tessellator_output_primitive", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR, "dcl_hs_max_tessfactor", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT, + "dcl_hs_fork_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, + "dcl_hs_join_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP); + +SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP, "dcl_thread_group", 0, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED, "dcl_uav_typed", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW, "dcl_uav_raw", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED, + "dcl_uav_structured", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW, "dcl_tgsm_raw", 1, + 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED, + "dcl_tgsm_structured", 1, 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_RESOURCE_RAW, "dcl_resource_raw", 1, 0x00, + D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED, "dcl_resource_structured", 1, + 0x00, D3D10_SB_DCL_OP); +SET(D3D11_SB_OPCODE_LD_UAV_TYPED, "ld_uav_typed", 3, 0x00, D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_STORE_UAV_TYPED, "store_uav_typed", 3, 0x00, + D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_LD_RAW, "ld_raw", 3, 0x00, D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_STORE_RAW, "store_raw", 3, 0x00, D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_LD_STRUCTURED, "ld_structured", 4, 0x00, D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_STORE_STRUCTURED, "store_structured", 4, 0x00, + D3D11_SB_MEM_OP); +SET(D3D11_SB_OPCODE_ATOMIC_AND, "atomic_and", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_OR, "atomic_or", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_XOR, "atomic_xor", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_CMP_STORE, "atomic_cmp_store", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_IADD, "atomic_iadd", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_IMAX, "atomic_imax", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_IMIN, "atomic_imin", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_UMAX, "atomic_umax", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_ATOMIC_UMIN, "atomic_umin", 3, 0x00, D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_ALLOC, "imm_atomic_alloc", 2, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_CONSUME, "imm_atomic_consume", 2, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_IADD, "imm_atomic_iadd", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_AND, "imm_atomic_and", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_OR, "imm_atomic_or", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_XOR, "imm_atomic_xor", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_EXCH, "imm_atomic_exch", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH, "imm_atomic_cmp_exch", 5, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_IMAX, "imm_atomic_imax", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_IMIN, "imm_atomic_imin", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_UMAX, "imm_atomic_umax", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_IMM_ATOMIC_UMIN, "imm_atomic_umin", 4, 0x00, + D3D11_SB_ATOMIC_OP); +SET(D3D11_SB_OPCODE_SYNC, "sync", 0, 0x00, D3D10_SB_FLOW_OP); +SET(D3D11_SB_OPCODE_EVAL_SNAPPED, "eval_snapped", 3, 0x02, D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX, "eval_sample_index", 3, 0x02, + D3D10_SB_FLOAT_OP); +SET(D3D11_SB_OPCODE_EVAL_CENTROID, "eval_centroid", 2, 0x02, + D3D10_SB_FLOAT_OP); + +SET(D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT, "dcl_gsinstances", 0, 0x00, + D3D10_SB_DCL_OP); + +SET(D3D11_SB_OPCODE_DADD, "dadd", 3, 0x06, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DMAX, "dmax", 3, 0x06, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DMIN, "dmin", 3, 0x06, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DMUL, "dmul", 3, 0x06, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DEQ, "deq", 3, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DGE, "dge", 3, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DLT, "dlt", 3, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DNE, "dne", 3, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DMOV, "dmov", 2, 0x02, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DMOVC, "dmovc", 4, 0x0c, D3D11_SB_DOUBLE_OP); +SET(D3D11_SB_OPCODE_DTOF, "dtof", 2, 0x02, D3D11_SB_DOUBLE_TO_FLOAT_OP); +SET(D3D11_SB_OPCODE_FTOD, "ftod", 2, 0x00, D3D11_SB_FLOAT_TO_DOUBLE_OP); + +SET(D3D11_SB_OPCODE_ABORT, "abort", 0, 0x00, D3D11_SB_DEBUG_OP); +SET(D3D11_SB_OPCODE_DEBUG_BREAK, "debug_break", 0, 0x00, D3D11_SB_DEBUG_OP); + +SET(D3D11_1_SB_OPCODE_DDIV, "ddiv", 3, 0x06, D3D11_SB_DOUBLE_OP); +SET(D3D11_1_SB_OPCODE_DFMA, "dfma", 4, 0x0e, D3D11_SB_DOUBLE_OP); +SET(D3D11_1_SB_OPCODE_DRCP, "drcp", 2, 0x02, D3D11_SB_DOUBLE_OP); + +SET(D3D11_1_SB_OPCODE_MSAD, "msad", 4, 0x0e, D3D10_SB_UINT_OP); + +SET(D3D11_1_SB_OPCODE_DTOI, "dtoi", 2, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_1_SB_OPCODE_DTOU, "dtou", 2, 0x00, D3D11_SB_DOUBLE_OP); +SET(D3D11_1_SB_OPCODE_ITOD, "itod", 2, 0x00, D3D10_SB_INT_OP); +SET(D3D11_1_SB_OPCODE_UTOD, "utod", 2, 0x00, D3D10_SB_UINT_OP); + +SET(D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK, "gather4_s", 5, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK, "gather4_c_s", 6, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK, "gather4_po_s", 6, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK, "gather4_po_c_s", 7, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_LD_FEEDBACK, "ld_s", 4, 0x00, D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK, "ldms_s", 5, 0x00, D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_LD_UAV_TYPED_FEEDBACK, "ld_uav_typed_s", 4, 0x00, + D3D11_SB_MEM_OP); +SET(D3DWDDM1_3_SB_OPCODE_LD_RAW_FEEDBACK, "ld_raw_s", 4, 0x00, + D3D11_SB_MEM_OP); +SET(D3DWDDM1_3_SB_OPCODE_LD_STRUCTURED_FEEDBACK, "ld_structured_s", 5, 0x00, + D3D11_SB_MEM_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK, "sample_l_s", 6, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK, "sample_c_lz_s", 6, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK, "sample_cl_s", 6, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK, "sample_b_cl_s", 7, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK, "sample_d_cl_s", 8, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK, "sample_c_cl_s", 7, 0x00, + D3D10_SB_TEX_OP); +SET(D3DWDDM1_3_SB_OPCODE_CHECK_ACCESS_FULLY_MAPPED, + "check_access_fully_mapped", 2, 0x00, D3D10_SB_TEX_OP); diff --git a/mlir/test/Target/DXSA/empty.mlir b/mlir/test/Target/DXSA/empty.mlir index 19d6cd55d36e..ed5a35ec9bfd 100644 --- a/mlir/test/Target/DXSA/empty.mlir +++ b/mlir/test/Target/DXSA/empty.mlir @@ -1,4 +1,8 @@ // RUN: mlir-translate --import-dxsa-bin | FileCheck %s +// RUN: mlir-translate --export-dxsa-bin %s -o - | mlir-translate --import-dxsa-bin - | FileCheck %s // CHECK: module { // CHECK-NEXT } + +module { +} diff --git a/mlir/test/Target/DXSA/mov-index.mlir b/mlir/test/Target/DXSA/mov-index.mlir index ff3ac88aa078..eb2323ebdb53 100644 --- a/mlir/test/Target/DXSA/mov-index.mlir +++ b/mlir/test/Target/DXSA/mov-index.mlir @@ -1,4 +1,8 @@ // RUN: mlir-translate --import-dxsa-bin %S/inputs/mov-index.bin | FileCheck %s +// RUN: mlir-translate --export-dxsa-bin %s -o %t.bin +// RUN: mlir-translate --import-dxsa-bin %t.bin | FileCheck %s +// RUN: diff %t.bin %S/inputs/mov-index.bin + // mov o0.xyzw, v[r0.x][0].xyzw // CHECK: module { @@ -11,3 +15,14 @@ // CHECK-NEXT: %6 = dxsa.operand %4, %5 {num_components = 4 : i32, swizzle = dense<[0, 1, 2, 3]> : vector<4xi32>, type = 1 : i32} // CHECK-NEXT: dxsa.instruction "mov" %1, %6 // CHECK-NEXT: } + +module { + %0 = dxsa.index.imm {imm = 0 : i32} + %1 = dxsa.operand %0 {mask = 240 : i32, num_components = 4 : i32, type = 2 : i32} + %2 = dxsa.index.imm {imm = 0 : i32} + %3 = dxsa.operand %2 {num_components = 4 : i32, one = 0 : i32, type = 0 : i32} + %4 = dxsa.index.rel %3 + %5 = dxsa.index.imm {imm = 0 : i32} + %6 = dxsa.operand %4, %5 {num_components = 4 : i32, swizzle = dense<[0, 1, 2, 3]> : vector<4xi32>, type = 1 : i32} + dxsa.instruction "mov" %1, %6 +} diff --git a/mlir/test/Target/DXSA/mov.mlir b/mlir/test/Target/DXSA/mov.mlir index 4e4bd9989df0..de2854b3e0a4 100644 --- a/mlir/test/Target/DXSA/mov.mlir +++ b/mlir/test/Target/DXSA/mov.mlir @@ -1,4 +1,8 @@ // RUN: mlir-translate --import-dxsa-bin %S/inputs/mov.bin | FileCheck %s +// RUN: mlir-translate --export-dxsa-bin %s -o %t.bin +// RUN: mlir-translate --import-dxsa-bin %t.bin | FileCheck %s +// RUN: diff %t.bin %S/inputs/mov.bin + // mov r0.x, l(3.000000) // CHECK: module { diff --git a/mlir/test/Target/DXSA/operand_components_invalid.mlir b/mlir/test/Target/DXSA/operand_components_invalid.mlir new file mode 100644 index 000000000000..99267e2aa913 --- /dev/null +++ b/mlir/test/Target/DXSA/operand_components_invalid.mlir @@ -0,0 +1,23 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +// OK +%0 = dxsa.operand {num_components = 0 : i32, type = 1 : i32} + +// OK +%1 = dxsa.operand {num_components = 1 : i32, type = 1 : i32} + +// expected-error@+1 {{invalid number of components}} +%2 = dxsa.operand {num_components = 2 : i32, type = 1 : i32} + +// ----- + +// expected-error@+1 {{invalid number of components}} +%3 = dxsa.operand {num_components = 3 : i32, type = 1 : i32} + +// OK +%4 = dxsa.operand {num_components = 4 : i32, type = 1 : i32} + +// ----- + +// expected-error@+1 {{invalid number of components}} +%5 = dxsa.operand {num_components = 5 : i32, type = 1 : i32} diff --git a/mlir/test/Target/DXSA/operand_imm_components_invalid.mlir b/mlir/test/Target/DXSA/operand_imm_components_invalid.mlir new file mode 100644 index 000000000000..539cff67a62f --- /dev/null +++ b/mlir/test/Target/DXSA/operand_imm_components_invalid.mlir @@ -0,0 +1,20 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +// OK +%1 = dxsa.operand.imm {imm = dense<1> : vector<1xi32>} + +// expected-error@+1 {{immediate operand should be either 1- or 4- component}} +%2 = dxsa.operand.imm {imm = dense<[1, 2]> : vector<2xi32>} + +// ----- + +// expected-error@+1 {{immediate operand should be either 1- or 4- component}} +%3 = dxsa.operand.imm {imm = dense<[1, 2, 3]> : vector<3xi32>} + +// ----- + +// OK +%4 = dxsa.operand.imm {imm = dense<[1, 2, 3, 4]> : vector<4xi32>} + +// expected-error@+1 {{immediate operand should be either 1- or 4- component}} +%5 = dxsa.operand.imm {imm = dense<[1, 2, 3, 4, 5]> : vector<5xi32>} diff --git a/mlir/test/Target/DXSA/operand_swizzle_invalid.mlir b/mlir/test/Target/DXSA/operand_swizzle_invalid.mlir new file mode 100644 index 000000000000..08eec568f36a --- /dev/null +++ b/mlir/test/Target/DXSA/operand_swizzle_invalid.mlir @@ -0,0 +1,4 @@ +// RUN: mlir-opt %s -verify-diagnostics + +// expected-error@+1 {{invalid number of swizzle values}} +%1 = dxsa.operand {num_components = 4 : i32, swizzle = dense<[0, 1, 2, 3, 4]> : vector<5xi32>, type = 1 : i32} diff --git a/mlir/test/Target/DXSA/ret.mlir b/mlir/test/Target/DXSA/ret.mlir index 24e0ec711ffc..b9b352bcab74 100644 --- a/mlir/test/Target/DXSA/ret.mlir +++ b/mlir/test/Target/DXSA/ret.mlir @@ -1,4 +1,7 @@ // RUN: mlir-translate --import-dxsa-bin %S/inputs/ret.bin | FileCheck %s +// RUN: mlir-translate --export-dxsa-bin %s -o %t.bin +// RUN: mlir-translate --import-dxsa-bin %t.bin | FileCheck %s +// RUN: diff %t.bin %S/inputs/ret.bin // CHECK: module { // CHECK-NEXT: dxsa.instruction "ret" diff --git a/mlir/test/Target/DXSA/udiv.mlir b/mlir/test/Target/DXSA/udiv.mlir index cbd535c7df8e..69f13aa71488 100644 --- a/mlir/test/Target/DXSA/udiv.mlir +++ b/mlir/test/Target/DXSA/udiv.mlir @@ -1,4 +1,8 @@ // RUN: mlir-translate --import-dxsa-bin %S/inputs/udiv.bin | FileCheck %s +// RUN: mlir-translate --export-dxsa-bin %s -o %t.bin +// RUN: mlir-translate --import-dxsa-bin %t.bin | FileCheck %s +// RUN: diff %t.bin %S/inputs/udiv.bin + // udiv NULL, r0.x, vOutputControlPointID, 4 // CHECK: module { @@ -9,3 +13,12 @@ // CHECK-NEXT: %4 = dxsa.operand.imm {imm = dense<4> : vector<1xi32>} // CHECK-NEXT: dxsa.instruction "udiv" %0, %2, %3, %4 // CHECK-NEXT: } + +module { + %0 = dxsa.operand {num_components = 0 : i32, type = 13 : i32} + %1 = dxsa.index.imm {imm = 0 : i32} + %2 = dxsa.operand %1 {mask = 16 : i32, num_components = 4 : i32, type = 0 : i32} + %3 = dxsa.operand {num_components = 1 : i32, type = 22 : i32} + %4 = dxsa.operand.imm {imm = dense<4> : vector<1xi32>} + dxsa.instruction "udiv" %0, %2, %3, %4 +}