Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions aiter/configs/model_configs/qwen3_235b_bf16_tuned_fmoe.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
cu_num,token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,q_type,use_g1u1,doweight_stage1,block_m,ksplit,us1,kernelName1,err1,us2,kernelName2,err2,us,run_1stage,xbf16,tflops,bw,_tag
256,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,25.6519,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,9.2504,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,34.9023,0,0,2.16,34610.21,
256,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,27.5466,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,17.3969,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,44.9435,0,0,3.36,26878.02,
256,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,32.7806,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,22.4269,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,55.2075,0,0,5.47,21881.54,
256,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,58.1181,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,34.3805,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,92.4986,0,0,6.53,13060.64,
256,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,90.0426,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,50.6559,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,140.6985,0,0,8.59,8587.31,
256,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,127.8366,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,68.2598,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,196.0964,0,0,12.32,6162.7,
256,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,136.1702,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,74.8871,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,211.0573,0,0,22.89,5728.34,
256,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,138.0199,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,76.728,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,214.7479,0,0,45.0,5634.78,
256,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.4918,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,80.2598,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,220.7516,0,0,87.55,5491.03,
256,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,145.1529,moe_ck2stages_gemm1_256x64x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,89.5603,moe_ck2stages_gemm2_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,234.7132,0,0,164.69,5182.27,
256,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,170.3325,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,121.7016,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,292.0341,0,0,264.73,4193.81,
256,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,255.1849,moe_ck2stages_gemm1_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,169.7393,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,424.9242,0,0,363.87,2921.73,
256,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,374.4311,moe_ck2stages_gemm1_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,282.9883,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,657.4194,0,0,470.38,1939.51,
256,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,688.2845,moe_ck2stages_gemm1_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,502.2363,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,1190.5208,0,0,519.5,1127.39,
256,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,1278.7463,moe_ck2stages_gemm1_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,966.9571,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,2245.7034,0,0,550.81,657.43,
256,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,2125.7129,moe_ck2stages_gemm1_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1935.3483,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,4061.0612,0,0,609.18,429.65,
17 changes: 17 additions & 0 deletions aiter/configs/model_configs/qwen3_235b_bf16_untuned_fmoe.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,q_type,use_g1u1,doweight_stage1
1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0
Loading